blob: 2d6c1789a9acfafccdca2ff8e55573d8cdf213c9 [file] [log] [blame]
#! /usr/bin/perl -w
###############################################################################
#
# mpiexec - wrapper script for mpiexec to run srun instead.
#
#
###############################################################################
# Copyright (C) 2007 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Danny Auble <auble1@llnl.gov>.
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# In addition, as a special exception, the copyright holders give permission
# to link the code of portions of this program with the OpenSSL library under
# certain conditions as described in each individual source file, and
# distribute linked combinations including the two. You must obey the GNU
# General Public License in all respects for all of the code used other than
# OpenSSL. If you modify file(s) with this exception, you may extend this
# exception to your version of the file(s), but you are not obligated to do
# so. If you do not wish to do so, delete this exception statement from your
# version. If you delete this exception statement from all source files in
# the program, then also delete it here.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
###############################################################################
use strict;
use FindBin;
use Getopt::Long 2.24 qw(:config no_ignore_case require_order);
use lib "${FindBin::Bin}/../lib/perl";
use autouse 'Pod::Usage' => qw(pod2usage);
my $srun = "${FindBin::Bin}/srun";
my ($nprocs, $hostname, $verbose, $nostdin, $allstdin, $nostdout, $pernode,
$perif, $no_shem, $gige, $kill_it, $tv, $config_file, $timeout,
$help, $man);
sub get_new_config() {
my @file_parts = split(/\//, $config_file);
my $new_config = "/tmp/$file_parts[$#file_parts].slurm";
my $task_cnt = 0;
my $end_cnt = 0;
open OLD_FILE, "$config_file" or
die "$config_file doesn't exist!";
open FILE, ">$new_config" or
die "Can't open $new_config";
foreach my $line (<OLD_FILE>) {
my @parts = split(/\:/, $line);
if(!$parts[0] || !$parts[1]
|| ($parts[0] eq "")
|| ($parts[1] eq "")
|| ($parts[0] =~ '#')) {
next;
} elsif ($parts[0] =~ '\-n *(\d+)') {
$end_cnt = $task_cnt+$1-1;
print FILE "$task_cnt-$end_cnt\t$parts[1]";
$task_cnt = $end_cnt+1;
} else {
print "We don't have support for hostname task layout in a config file right now.\nPlease use srun with the -m arbitrary mode to layout tasks on specific nodes.\n";
}
}
close FILE;
close OLD_FILE;
return ($new_config, $task_cnt);
}
GetOptions('n=i' => \$nprocs,
'host=s' => \$hostname,
'verbose+' => \$verbose,
'nostdin' => \$nostdin,
'allstdin' => \$allstdin,
'nostdout' => \$nostdout,
'pernode' => \$pernode,
'perif' => \$perif, # n/a
'no-shmem' => \$no_shem, # n/a
'gige' => \$gige, # n/a
'kill' => \$kill_it, # n/a
'tv|totalview' => \$tv, # n/a
'config=s' => \$config_file,
'help|?' => \$help,
'man' => \$man
) or pod2usage(2);
# Display usage if necessary
pod2usage(0) if $help;
if ($man) {
if ($< == 0) { # Cannot invoke perldoc as root
my $id = eval { getpwnam("nobody") };
$id = eval { getpwnam("nouser") } unless defined $id;
$id = -2 unless defined $id;
$< = $id;
}
$> = $<; # Disengage setuid
$ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0
else { die "Illegal characters were found in \$0 ($0)\n"; }
pod2usage(-exitstatus => 0, -verbose => 2);
}
# Use sole remaining argument as jobIds
if(!$ARGV[0] && !$config_file) {
pod2usage(2);
}
my $new_config;
my @command = ("$srun");
# write stdout and err to files instead of stdout
push @command, "-o job.o\%j -e job.e\%j" if $nostdout;
push @command, "-inone" if $nostdin;
push @command, "-i0" if !$allstdin; #default only send stdin to first node
push @command, "-n$nprocs" if $nprocs; # number of tasks
push @command, "-w$hostname" if $hostname; # Hostlist provided
push @command, "-t '" . $ENV{"MPIEXEC_TIMEOUT"} . "'" if $ENV{"MPIEXEC_TIMEOUT"};
if($verbose) {
my $verbose_flag = "-"; # verbose
for(my $i=0; $i<$verbose; $i++) {
$verbose_flag .= "v";
}
push @command, $verbose_flag
}
if($config_file) {
($new_config, my $new_nprocs) = get_new_config();
push @command, "-n$new_nprocs" if !$nprocs;
push @command, "--multi-prog $new_config";
} else {
push @command, @ARGV;
}
#print "@command\n";
my $exit_code = system(@command);
system("rm -f $new_config") if($new_config);
exit($exit_code >> 8);
__END__
=head1 NAME
B<mpiexec.slurm> - Run an MPI program under Slurm
=head1 SYNOPSIS
mpiexec.slurm args executable pgmargs
where args are command line arguments for mpiexec (see below), executable is
the name of the executable and pgmargs are command line arguments for the
executable. For example the following command will run the MPI program a.out on
4 processes:
mpiexec.slurm -n 4 a.out
mpiexec.slurm supports the following options:
[-n nprocs]
[-host hostname]
[-verbose]
[-nostdin]
[-allstdin]
[-nostdout]
[-pernode]
[-config config_file]
[-help|-?]
[-man]
=head1 DESCRIPTION
The B<mpiexec.slurm>
=head1 OPTIONS
=over 4
=item B<-n <np>>
Specify the number of processes to use
=item B<-host hostname>
Name of host on which to run processes
=item B<-verbose>
Increase the verbosity of mpiexec.slurm informational messages. Multiple
-verbose's will further increase mpiexec.slurm's verbosity. By default only
errors will be displayed.
=item B<-nostdin>
Do not connect the standard input stream of process 0 to the mpiexec process.
If the process attempts to read from stdin, it will see an end-of-file.
=item B<-allstdin>
Send the standard input stream of mpiexec.slurm to all processes. Each
character typed to mpiexec (or read from a file) is duplicated numproc times,
and sent to each process. This permits every process to read, for example,
configuration information from the input stream.
=item B<-nostdout>
Do not connect the standard output and error streams of each process back to
the mpiexec.slurm process. Standard output and error will be respectively
written in files of the form job.ojobid and job.ejobid for batch jobs, and
directly to the controlling terminal for interactive jobs.
=item B<-pernode>
Allocate only one process per compute node. For SMP nodes, only one processor
will be allocated a job. This flag is used to implement multiple level
parallelism with MPI between nodes, and threads within a node, assuming the
code is set up to do that.
=item B<-config <config_file>>
Process executable and arguments are specified in the given configuration file.
This flag permits the use of heterogeneous jobs using multiple executables. No
executable is given on the command line when using the -config flag. If
config_file is "-", then the configuration is read from standard input. In this
case the flag -nostdin is mandatory, as it is not possible to separate the
contents of the configuration file from process input. The config_file can
contain lines beginning with "#", that are considered comments and ignored and
and one or more lines with the following format:
-n XX : executable [args]
where XX is the number of processes to be used, executable is the name of the
program to run and args are its arguments. For example:
# Sample mpiexec config file
# Launch two instance of foo
-n 2 : foo
# and three instances of bar
-n 3 bar
There is no support for hostname task layout in a config file at the moment.
=item B<-help|-?>
Display a brief help page
=back
=cut