blob: c4471d02fc7561b260efac0c3c006d03bd7a8943 [file] [log] [blame]
#! /bin/bash
# Copyright 2015 Princeton University Research Computing
# Input should look like this for an end record:
# $1: '-s' (the subject argument keyword)
# $2: The subject itself
# $3: The To: email address.
#
# The subject should look like this for an start record:
# Slurm Job_id=323 Name=ddt_clone Began, Queued time 00:00:01
#
# The subject should look like this for an end record:
# Slurm Job_id=327 Name=ddt_clone Ended, Run time 00:05:01, COMPLETED, ExitCode 0
# Slurm Job_id=328 Name=ddt_clone Failed, Run time 00:05:01, FAILED, ExitCode 127
# Slurm Job_id=342 Name=ddt_clone Ended, Run time 00:00:33, CANCELLED, ExitCode 0
# Not sure what to do about PENDING state resulting from a requeue request.
# Doing a seff on it for now:
# Slurm Job_id=326 Name=ddt_clone Failed, Run time 00:00:41, PENDING, ExitCode 0
#
# These end records are the only types of messages to process. They have 4 (rather
# than 2) comma-delimited arguments, of which ending status is the 3rd.
# Just pass through notifications without an ending status.
SEFF=@bindir@/seff
MAIL=/bin/mail
IFS=","
array=($2)
IFS=" "
#### JOB_COMPLETION_TIME ###
# The time needed for job to complete and synchronize accounting data with
# slurmdbd. If you're running slurmctld under systemd control executing
# `systemctl stop slurmctld` or `systemctl restart slurmctld` may hang for this
# time. While slurmctld will be down systemd will still waits for all
# descendant processes (in this case sleep executed from smail) to complete.
# The default value is 5s (half of default MessageTimeout)
JOB_COMPLETION_TIME=5
# Get the ClusterName
ClusterName=${SLURM_CLUSTER_NAME}
subject="$ClusterName $2"
recipient=$3
# If we decide later to seff based on specific status codes,
# we can test against $status.
status=`echo "${array[2]}" | tr -d ' '`
if [ -n "$status" ]; then
sarray=(${array[0]})
IFS="="
if [ "${sarray[1]}" = "Array" ]; then
sarray=(${sarray[3]})
else
sarray=(${sarray[1]})
fi
IFS=" "
jobid="${sarray[1]}"
# Remove the trailing "_*" until seff supports array jobs fully
jobid=${jobid%"_*"}
sleep $JOB_COMPLETION_TIME
$SEFF $jobid | $MAIL -s "$subject" $recipient
else
$MAIL -s "$subject" $recipient
fi