| #! /bin/bash |
| |
| # Copyright 2015 Princeton University Research Computing |
| |
| # Input should look like this for an end record: |
| # $1: '-s' (the subject argument keyword) |
| # $2: The subject itself |
| # $3: The To: email address. |
| # |
| # The subject should look like this for an start record: |
| # Slurm Job_id=323 Name=ddt_clone Began, Queued time 00:00:01 |
| # |
| # The subject should look like this for an end record: |
| # Slurm Job_id=327 Name=ddt_clone Ended, Run time 00:05:01, COMPLETED, ExitCode 0 |
| # Slurm Job_id=328 Name=ddt_clone Failed, Run time 00:05:01, FAILED, ExitCode 127 |
| # Slurm Job_id=342 Name=ddt_clone Ended, Run time 00:00:33, CANCELLED, ExitCode 0 |
| # Not sure what to do about PENDING state resulting from a requeue request. |
| # Doing a seff on it for now: |
| # Slurm Job_id=326 Name=ddt_clone Failed, Run time 00:00:41, PENDING, ExitCode 0 |
| # |
| # These end records are the only types of messages to process. They have 4 (rather |
| # than 2) comma-delimited arguments, of which ending status is the 3rd. |
| # Just pass through notifications without an ending status. |
| |
| SEFF=@bindir@/seff |
| MAIL=/bin/mail |
| |
| IFS="," |
| array=($2) |
| IFS=" " |
| |
| #### JOB_COMPLETION_TIME ### |
| # The time needed for job to complete and synchronize accounting data with |
| # slurmdbd. If you're running slurmctld under systemd control executing |
| # `systemctl stop slurmctld` or `systemctl restart slurmctld` may hang for this |
| # time. While slurmctld will be down systemd will still waits for all |
| # descendant processes (in this case sleep executed from smail) to complete. |
| # The default value is 5s (half of default MessageTimeout) |
| JOB_COMPLETION_TIME=5 |
| |
| # Get the ClusterName |
| ClusterName=${SLURM_CLUSTER_NAME} |
| subject="$ClusterName $2" |
| recipient=$3 |
| |
| # If we decide later to seff based on specific status codes, |
| # we can test against $status. |
| status=`echo "${array[2]}" | tr -d ' '` |
| if [ -n "$status" ]; then |
| sarray=(${array[0]}) |
| IFS="=" |
| if [ "${sarray[1]}" = "Array" ]; then |
| sarray=(${sarray[3]}) |
| else |
| sarray=(${sarray[1]}) |
| fi |
| IFS=" " |
| jobid="${sarray[1]}" |
| # Remove the trailing "_*" until seff supports array jobs fully |
| jobid=${jobid%"_*"} |
| sleep $JOB_COMPLETION_TIME |
| $SEFF $jobid | $MAIL -s "$subject" $recipient |
| else |
| $MAIL -s "$subject" $recipient |
| fi |