| #!/bin/bash |
| # |
| # chkconfig: 345 90 10 |
| # description: SLURM is a simple resource management system which \ |
| # manages exclusive access to a set of compute \ |
| # resources and distributes work to those resources. |
| # |
| # processname: @sbindir@/slurmd |
| # pidfile: /var/run/slurmd.pid |
| # |
| # processname: @sbindir@/slurmctld |
| # pidfile: /var/run/slurmctld.pid |
| # |
| # config: /etc/sysconfig/slurm |
| # |
| ### BEGIN INIT INFO |
| # Provides: slurm |
| # Required-Start: $remote_fs $syslog $network munge |
| # Required-Stop: $remote_fs $syslog $network munge |
| # Should-Start: $named |
| # Should-Stop: $named |
| # Default-Start: 2 3 4 5 |
| # Default-Stop: 0 1 6 |
| # Short-Description: slurm daemon management |
| # Description: Start slurm to provide resource management |
| ### END INIT INFO |
| |
| BINDIR="@bindir@" |
| CONFDIR="@sysconfdir@" |
| LIBDIR="@libdir@" |
| SBINDIR="@sbindir@" |
| |
| # Source function library. |
| if [ -f /etc/rc.status ]; then |
| . /etc/rc.status |
| SUSE=1 |
| STARTPROC=startproc |
| |
| rc_reset |
| else |
| if [ ! -f /etc/rc.d/init.d/functions ]; then |
| echo "Could not find /etc/rc.d/init.d/functions. Is some other daemon launch mechanism used?" |
| exit 1 |
| fi |
| . /etc/rc.d/init.d/functions |
| SUSE=0 |
| STARTPROC=daemon |
| |
| function rc_status() { |
| RETVAL=$? |
| } |
| function rc_exit () { |
| exit $RETVAL |
| } |
| RETVAL=0 |
| fi |
| |
| # We can not use a starter program without losing environment |
| # variables that are critical on Blue Gene systems |
| if [ -d /bgl/BlueLight/ppcfloor ]; then |
| STARTPROC="" |
| fi |
| |
| # Source slurm specific configuration |
| # This can be used to alter limits for users jobs or set daemon options. |
| # For example, the limits for user jobs could be higher or lower than the |
| # default limits for user root (e.g. "ulimit -t unlimited" sets an unlimited |
| # CPU time limit for spawned user jobs). |
| # SLURMCTLD_OPTIONS defines slurmctld command line options. See "man slurmctld" |
| # SLURMD_OPTIONS defines slurmd command line options. See "man slurmd" |
| if [ -f /etc/sysconfig/slurm ] ; then |
| . /etc/sysconfig/slurm |
| else |
| SLURMCTLD_OPTIONS="" |
| SLURMD_OPTIONS="" |
| fi |
| |
| if [ ! -x $BINDIR/scontrol ]; then |
| echo "Could not find $BINDIR/scontrol. Bad path?" |
| exit 1 |
| fi |
| |
| if [ ! -f $CONFDIR/slurm.conf ]; then |
| echo "Could not find $CONFDIR/slurm.conf. Bad path?" |
| exit 1 |
| fi |
| |
| # setup library paths for slurm and munge support |
| export LD_LIBRARY_PATH=$LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} |
| |
| start() { |
| prog=$1 |
| shift |
| echo -n "starting $prog: " |
| unset HOME MAIL USER USERNAME |
| |
| $STARTPROC $SBINDIR/$prog $* |
| rc_status -v |
| echo |
| touch /var/lock/subsys/slurm |
| } |
| |
| stop() { |
| echo -n "stopping $1: " |
| killproc $1 -TERM |
| rc_status -v |
| echo |
| rm -f /var/lock/subsys/slurm |
| } |
| |
| startall() { |
| for prog in `$BINDIR/scontrol show daemons`; do |
| optvar=`echo ${prog}_OPTIONS | tr "a-z" "A-Z"` |
| if [[ ${MULTIPLE_SLURMD} == yes ]] && [[ ${prog} == slurmd ]] |
| then |
| for node in $($BINDIR/scontrol show aliases) |
| do |
| start $prog -N ${node} ${!optvar} |
| done |
| else |
| start $prog ${!optvar} |
| fi |
| done |
| } |
| |
| # |
| # status() with slight modifications to take into account |
| # instantiations of job manager slurmd's, which should not be |
| # counted as "running" |
| # |
| slurmstatus() { |
| local base=${1##*/} |
| local pid |
| local rpid |
| local pidfile |
| local pidfiles |
| local rc |
| |
| pidfile=`grep -i ${base}pid $CONFDIR/slurm.conf | grep -v '^ *#'` |
| if [ $? = 0 ]; then |
| pidfile=${pidfile##*=} |
| pidfile=${pidfile%#*} |
| pidfile=${pidfile//\"/} |
| else |
| pidfile=/var/run/${base}.pid |
| fi |
| |
| pid=`pidof -o $$ -o $$PPID -o %PPID -x $1 || \ |
| pidof -o $$ -o $$PPID -o %PPID -x ${base}` |
| |
| if [ "$base" == "slurmd" ] ; then |
| echo ${pidfile} | grep -q %n |
| if [[ $? -eq 0 ]] |
| then |
| for n in $($BINDIR/scontrol show aliases) |
| do |
| pidfiles="${pidfiles} $(echo ${pidfile} | sed "s/%n/$n/g")" |
| done |
| else |
| pidfiles=${pidfile} |
| fi |
| else |
| pidfiles=${pidfile} |
| fi |
| |
| RETVAL=0 |
| for pidfile in ${pidfiles} |
| do |
| rc=1 |
| if [ -f $pidfile ]; then |
| read rpid < $pidfile |
| if [ "$rpid" != "" -a "$pid" != "" ]; then |
| for i in $pid ; do |
| if [ "$i" = "$rpid" ]; then |
| echo $"${base} (pid $rpid) is running..." |
| rc=0 |
| break |
| fi |
| done |
| elif [ "$rpid" != "" -a "$pid" = "" ]; then |
| # Due to change in user id, pid file may persist |
| # after slurmctld terminates |
| if [ "$base" != "slurmctld" ] ; then |
| echo $"${base} dead but pid file exists" |
| else |
| echo $"${base} is stopped" |
| fi |
| RETVAL=1 |
| fi |
| fi |
| |
| if [[ $rc -eq 0 ]] |
| then |
| continue |
| fi |
| |
| if [ "$base" = "slurmctld" -a "$pid" != "" ] ; then |
| echo $"${base} (pid $pid) is running..." |
| continue |
| fi |
| |
| echo $"${base} is stopped" |
| if [ "$RETVAL" == "0" ]; then |
| RETVAL=3 |
| fi |
| done |
| |
| return $RETVAL |
| } |
| |
| # |
| # stop slurm daemons, |
| # wait for termination to complete (up to 10 seconds) before returning |
| # |
| slurmstop() { |
| for prog in `$BINDIR/scontrol show daemons`; do |
| stop $prog |
| |
| for i in 1 2 3 4 |
| do |
| sleep $i |
| slurmstatus $prog |
| if [ $? != 0 ]; then |
| break |
| fi |
| done |
| done |
| |
| # slurmstatus return 1 or 3 in case of stopped daemon |
| # and that is what we are looking for here |
| if [[ ${RETVAL} =~ 1|3 ]] |
| then |
| RETVAL=0 |
| else |
| RETVAL=1 |
| fi |
| } |
| |
| # |
| # The pathname substitution in daemon command assumes prefix and |
| # exec_prefix are same. This is the default, unless the user requests |
| # otherwise. |
| # |
| # Any node can be a slurm controller and/or server. |
| # |
| case "$1" in |
| start) |
| startall |
| ;; |
| startclean) |
| SLURMCTLD_OPTIONS="-c $SLURMCTLD_OPTIONS" |
| SLURMD_OPTIONS="-c $SLURMD_OPTIONS" |
| startall |
| ;; |
| stop) |
| slurmstop |
| ;; |
| status) |
| anystop=0 |
| for prog in `$BINDIR/scontrol show daemons`; do |
| slurmstatus $prog |
| rc=$? |
| if [ $rc != 0 ] ; then |
| anystop=$rc |
| fi |
| done |
| RETVAL=$anystop |
| ;; |
| restart) |
| $0 stop |
| $0 start |
| ;; |
| condrestart) |
| if [ -f /var/lock/subsys/slurm ]; then |
| for prog in `$BINDIR/scontrol show daemons`; do |
| stop $prog |
| sleep 1 |
| optvar=`echo ${prog}_OPTIONS | tr "a-z" "A-Z"` |
| if [[ ${MULTIPLE_SLURMD} == yes ]] && [[ ${prog} == slurmd ]] |
| then |
| for node in $($BINDIR/scontrol show aliases) |
| do |
| start $prog -N ${node} |
| done |
| else |
| start $prog ${!optvar} |
| fi |
| done |
| fi |
| ;; |
| reconfig|reload) |
| for prog in `$BINDIR/scontrol show daemons`; do |
| echo -n $"Reloading $prog daemon configuration: " |
| killproc $prog -HUP |
| echo |
| done |
| ;; |
| test) |
| for prog in `$BINDIR/scontrol show daemons`; do |
| echo "$prog runs here" |
| done |
| ;; |
| *) |
| echo "Usage: $0 {start|startclean|stop|status|restart|reconfig|condrestart|test}" |
| exit 1 |
| ;; |
| esac |
| |
| rc_exit |