| #!/bin/bash |
| # ************************************************************************** |
| # Function: Wrapper that helps launching Intel MPI jobs within SLURM |
| # using MICs in native mode. |
| # mpiexec.hydra needs passwordless ssh access to all involved nodes |
| # Version: 0.4 |
| #--------------------------------------------------------------------------- |
| # 11.10.2013 Created by Chrysovalantis Paschoulas, Juelich Supercomputing Centre - Forschungszentrum Juelich |
| # Intial Script by (C) Olli-Pekka Lehto - CSC IT Center for Science Ltd. |
| # ************************************************************************** |
| |
| # Usage message |
| USAGE=" |
| USAGE |
| $(basename "$0") [ [-h] | [-v] [-x <host num tasks> -c <host binary>] [-z <mic num tasks> -m <mic binary>] ] |
| |
| OPTIONS |
| -h Print this message. |
| -c Binary that will run on host nodes. If it is not set then only the MICs will be used. |
| -m Binary that will run inside the MICs. |
| -x Number of tasks (MPI ranks) for the host nodes. Default value is 1. |
| -z Number of tasks (MPI ranks) for the MICs. Default value is 1. |
| -v Show more info for this script. |
| --tv Run using TotalView (equivalent to export MPIEXEC_PREFIX=\"totalview -args\"). |
| --tvcli Run using TotalView cli (equivalent to export MPIEXEC_PREFIX=\"totalviewcli -args\") |
| |
| MORE INFO |
| The user MUST export the following environment variables: |
| MIC_NUM_PER_HOST Number of MICs on each host that will be used by mpiexec. Available options: 0, 1, 2. Default 2. |
| OMP_NUM_THREADS OpenMP threads number per task on hosts. This MUST be exported when OpenMP is used! |
| MIC_OMP_NUM_THREADS OpenMP threads number per task on MICs. If not defined then is set same as OMP_NUM_THREADS. |
| |
| Also the user MAY pass additional flags to mpiexec exporting the following env vars: |
| MPIEXEC_PREFIX Wrap the execution of mpiexec with another tool (e.g. totalview). |
| MPIEXEC_FLAGS_HOST Flags that will be passed to the hosts. |
| MPIEXEC_FLAGS_MIC Flags that will be passed to the MICs. |
| -- Examples: |
| export MPIEXEC_PREFIX=\"totalview -args\" |
| export MPIEXEC_PREFIX=\"totalviewcli -args\" |
| export MPIEXEC_FLAGS_HOST=\"-env VAR VALUE\" |
| export MPIEXEC_FLAGS_MIC=\"-envlist VAR1,VAR2\" |
| |
| EXAMPLES |
| Batch Script1 - Only hosts: |
| --- |
| #!/bin/bash |
| #SBATCH -J TestJobMICNativeHybrid |
| #SBATCH -N 4 |
| #SBATCH -p q_mics |
| #SBATCH -o TestJob-%j.out |
| #SBATCH -e TestJob-%j.err |
| #SBATCH --time=30 |
| |
| module purge |
| module load impi intel/13.1.3 |
| |
| export MIC_NUM_PER_HOST=0 |
| export OMP_NUM_THREADS=32 |
| |
| mpirun-mic -x 1 -c ./impi_native_hybrid |
| --- |
| |
| Batch Script2 - Only mics: |
| --- |
| #!/bin/bash |
| #SBATCH -J TestJobMICNativeHybrid |
| #SBATCH -N 4 |
| #SBATCH -p q_mics |
| #SBATCH -o TestJob-%j.out |
| #SBATCH -e TestJob-%j.err |
| #SBATCH --time=30 |
| |
| module purge |
| module load impi intel/13.1.3 |
| |
| export MIC_NUM_PER_HOST=2 |
| export MIC_OMP_NUM_THREADS=240 |
| |
| mpirun-mic -z 1 -m ./impi_native_hybrid.mic |
| --- |
| |
| Batch Script3 - Hosts and MICs: |
| --- |
| #!/bin/bash |
| #SBATCH -J TestJobMICNativeHybrid |
| #SBATCH -N 2 |
| #SBATCH -p q_mics |
| #SBATCH -o TestJob-%j.out |
| #SBATCH -e TestJob-%j.err |
| #SBATCH --time=30 |
| |
| module purge |
| module load impi intel/13.1.3 |
| |
| export MIC_NUM_PER_HOST=2 |
| export OMP_NUM_THREADS=2 |
| export MIC_OMP_NUM_THREADS=4 |
| |
| mpirun-mic -v -x 16 -c ./impi_native_hybrid -z 60 -m ./impi_native_hybrid.mic |
| --- |
| "; |
| |
| # check script arguments |
| if [ $# -lt 1 ] ; then |
| echo "$USAGE" >&2 |
| exit 1 |
| fi |
| |
| # get script arguments |
| while getopts "vhc:m:x:z:-:" OPTION |
| do |
| case $OPTION in |
| c) |
| HOST_BINARY=$OPTARG |
| ;; |
| h) |
| echo "$USAGE"; |
| exit 0; |
| ;; |
| m) |
| MIC_BINARY=$OPTARG |
| ;; |
| v) |
| MPIRUN_MIC_VERBOSE=1 |
| ;; |
| x) |
| HOST_PPN=$OPTARG |
| ;; |
| z) |
| MIC_PPN=$OPTARG |
| ;; |
| -) |
| case $OPTARG in |
| tv) |
| MPIEXEC_PREFIX="totalview -args" |
| ;; |
| tvcli) |
| MPIEXEC_PREFIX="totalviewcli -args" |
| ;; |
| \?) echo $USAGE >&2 |
| exit 1 |
| ;; |
| esac |
| ;; |
| \?) |
| echo "$USAGE"; |
| exit 1; |
| ;; |
| esac |
| done |
| |
| |
| ### prepare the environment |
| # If not under SLURM just run on the local system, but still we must be on a compute node.. |
| if [[ -z "$SLURM_PROCID" ]] ; then |
| SLURM_PROCID=0 |
| fi |
| if [[ -z "$SLURM_NODELIST" ]] ; then |
| SLURM_NODELIST=`hostname` |
| fi |
| |
| # give default values |
| if [[ -z "$MIC_PPN" ]] ; then |
| MIC_PPN=1 |
| fi |
| if [[ -z "$HOST_PPN" ]] ; then |
| HOST_PPN=1 |
| fi |
| |
| if [[ -z "$MIC_NUM_PER_HOST" ]] ; then |
| MIC_NUM_PER_HOST=2 |
| fi |
| |
| |
| # We will use OMP_NUM_THREADS to decide if the user will run a Hybrid MPI+OpenMP job |
| # Here set default value for MIC_OMP_NUM_THREADS |
| if [[ -n "$OMP_NUM_THREADS" ]] ; then |
| if [[ -z "$MIC_OMP_NUM_THREADS" ]] ; then |
| MIC_OMP_NUM_THREADS=$OMP_NUM_THREADS |
| fi |
| fi |
| |
| # check the important values |
| if [[ -z "$HOST_BINARY" ]] && [[ -z "$MIC_BINARY" ]] ; then |
| echo "$USAGE" >&2 |
| exit 1; |
| fi |
| |
| # create the command line |
| #MPI_EXEC=mpirun |
| MPI_EXEC=mpiexec.hydra |
| EXEC_ARGS="" |
| |
| # create the list of the nodes that are configured to have MICs |
| LLIST_HOSTS_WITH_MICS=""; |
| SLIST_HOSTS_WITH_MICS=`sinfo -h -o "%N %G" | grep mic | awk '{ print $1; }'`; |
| for host in `scontrol show hostname $SLIST_HOSTS_WITH_MICS` ; do |
| LLIST_HOSTS_WITH_MICS="${LLIST_HOSTS_WITH_MICS} ${host}"; |
| done |
| |
| # create the lists of HOSTS AND MICS! |
| HOST_NODELIST=""; |
| MIC_NODELIST=""; |
| for host in `scontrol show hostname $SLURM_NODELIST` ; do |
| echo $LLIST_HOSTS_WITH_MICS | grep $host &> /dev/null |
| if [ $? -eq 0 ] ; then |
| if [ $MIC_NUM_PER_HOST -eq 1 ] ; then |
| MIC_NODELIST="${MIC_NODELIST} ${host}-mic0"; |
| elif [ $MIC_NUM_PER_HOST -eq 2 ] ; then |
| MIC_NODELIST="${MIC_NODELIST} ${host}-mic0 ${host}-mic1"; |
| fi |
| fi |
| HOST_NODELIST="${HOST_NODELIST} ${host}"; |
| done |
| |
| |
| # create the arguments |
| # args for hosts here |
| # run job on hosts if host binary is not null |
| if [[ -n "$HOST_BINARY" ]] ; then |
| if [[ -n "$HOST_NODELIST" ]] ; then |
| for n in $HOST_NODELIST ; do |
| if [[ -n "$OMP_NUM_THREADS" ]] ; then |
| # with OpenMP |
| EXEC_ARGS="${EXEC_ARGS} : -env OMP_NUM_THREADS $OMP_NUM_THREADS $MPIEXEC_FLAGS_HOST -n $HOST_PPN -host $n $HOST_BINARY"; |
| else |
| # without OpenMP |
| EXEC_ARGS="${EXEC_ARGS} : $MPIEXEC_FLAGS_HOST -n $HOST_PPN -host $n $HOST_BINARY"; |
| fi |
| done |
| fi |
| fi |
| # args for mics here |
| # run job on mics if mic binary is not null and MIC_NUM_PER_HOST is 1 or 2 |
| if [[ -n "$MIC_NODELIST" ]] ; then |
| for n in $MIC_NODELIST ; do |
| if [[ -n "$MIC_OMP_NUM_THREADS" ]] ; then |
| # with OpenMP |
| EXEC_ARGS="${EXEC_ARGS} : -env OMP_NUM_THREADS $MIC_OMP_NUM_THREADS -env LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH:$LD_LIBRARY_PATH $MPIEXEC_FLAGS_MIC -n $MIC_PPN -host $n $MIC_BINARY"; |
| #EXEC_ARGS="${EXEC_ARGS} : -env OMP_NUM_THREADS $MIC_OMP_NUM_THREADS $MPIEXEC_FLAGS_MIC -n $MIC_PPN -host $n $MIC_BINARY"; |
| else |
| # NO OpenMP |
| EXEC_ARGS="${EXEC_ARGS} : -env LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH:$LD_LIBRARY_PATH $MPIEXEC_FLAGS_MIC -n $MIC_PPN -host $n $MIC_BINARY"; |
| #EXEC_ARGS="${EXEC_ARGS} : $MPIEXEC_FLAGS_MIC -n $MIC_PPN -host $n $MIC_BINARY"; |
| fi |
| done |
| fi |
| |
| RUNCMD="$MPI_EXEC $EXEC_ARGS"; |
| |
| if [[ -n "$MPIEXEC_PREFIX" ]] ; then |
| RUNCMD="$MPIEXEC_PREFIX $RUNCMD"; |
| fi |
| |
| # extra important env (Local System depended) |
| #export LD_LIBRARY_PATH="$MIC_LD_LIBRARY_PATH:$LD_LIBRARY_PATH" |
| export I_MPI_MIC=1 |
| export I_MPI_DAPL_PROVIDER_LIST=ofa-v2-mlx4_0-1 |
| unset I_MPI_DEVICE |
| unset I_MPI_PMI_LIBRARY |
| |
| # start the job |
| if [ $SLURM_PROCID -eq 0 ] ; then |
| |
| if [[ -n "$MPIRUN_MIC_VERBOSE" ]] ; then |
| echo |
| echo "########################################################################" |
| echo "MPI Tasks per host: $HOST_PPN" |
| echo "Threads per host MPI task: $OMP_NUM_THREADS" |
| echo "Binary for the hosts: $HOST_BINARY" |
| echo "MPI Tasks per MIC: $MIC_PPN" |
| echo "Threads per MIC MPI task: $MIC_OMP_NUM_THREADS" |
| echo "Binary for the mics: $MIC_BINARY" |
| echo "MIC_NUM_PER_HOST: $MIC_NUM_PER_HOST" |
| echo |
| echo "MPIEXEC_PREFIX: $MPIEXEC_PREFIX" |
| echo "MPIEXEC_FLAGS_HOST: $MPIEXEC_FLAGS_HOST" |
| echo "MPIEXEC_FLAGS_MIC: $MPIEXEC_FLAGS_MIC" |
| echo "" |
| echo "Run command: " |
| echo "$RUNCMD" |
| echo "########################################################################" |
| echo |
| fi |
| |
| $RUNCMD |
| |
| fi |
| |
| |
| |