#!/bin/bash
#===============================================================================
# Copyright 2001-2019 Intel Corporation.
#
# This software and the related documents are Intel copyrighted  materials,  and
# your use of  them is  governed by the  express license  under which  they were
# provided to you (License).  Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute,  disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents  are provided as  is,  with no express
# or implied  warranties,  other  than those  that are  expressly stated  in the
# License.
#===============================================================================

#
echo "This is a SAMPLE run script.  Change it to reflect the correct number"
echo "of CPUs/threads, number of nodes, MPI processes per node, etc.."

# Set total number of MPI processes for the HPL (should be equal to PxQ).
export MPI_PROC_NUM=2

# Set the MPI per node to each node.
# MPI_PER_NODE should be equal to 1 or number of sockets in the system. Otherwise,
# the HPL performance will be low. 
# MPI_PER_NODE is same as -perhost or -ppn paramaters in mpirun/mpiexec
export MPI_PER_NODE=2

#
# You can find description of all Intel(R) MPI parameters in the
# Intel(R) MPI Reference Manual.
export I_MPI_DAPL_DIRECT_COPY_THRESHOLD=655360

#         "export I_MPI_PIN_CELL=core"
#         You can use this variable (beginning Intel(R) MPI 4.0.1 for cases if HT is enabled. 
#         The variable forces to pin MPI processes and threads to real cores, 
#         so logical processors will not be involved.
#         It can be used together with the variable below, when Hydra Process Manager:
#         "export I_MPI_PIN_DOMAIN=auto" This allows uniform distribution of
#	      the processes and thread domains

# export I_MPI_EAGER_THRESHOLD=128000
#          This setting may give 1-2% of performance increase over the
#          default value of 262000 for large problems and high number of cores

export OUT=xhpl_intel64_static_outputs.txt
export HPL_EXE=xhpl_intel64_static

echo -n "This run was done on: "
date

# Capture some meaningful data for future reference:
echo -n "This run was done on: " >> $OUT
date >> $OUT
echo "HPL.dat: " >> $OUT
cat HPL.dat >> $OUT
echo "Binary name: " >> $OUT
ls -l xhpl_intel64_static >> $OUT
echo "This script: " >> $OUT
cat runme_intel64_static >> $OUT
echo "Environment variables: " >> $OUT
env >> $OUT
echo "Actual run: " >> $OUT

# Environment variables can also be also be set on the Intel(R) MPI command line
# using the -genv option (to appear before the -np 1):

# mpirun -np ${MPI_PROC_NUM} ./runme_intel64_prv "$@" | tee -a $OUT

# In case of multiple nodes involved, please set the number of MPI processes
# per node (ppn=1,2 typically) through the -perhost option (because the
# default is all cores):

mpirun -perhost ${MPI_PER_NODE} -np ${MPI_PROC_NUM} ./runme_intel64_prv "$@" | tee -a $OUT

echo -n "Done: " >> $OUT
date >> $OUT

echo -n "Done: "
date
