| #!/bin/bash |
| # |
| # Generic release agent for SLURM cgroup usage |
| # |
| # Manage cgroup hierarchy like : |
| # |
| # /cgroup/subsystem/uid_%/job_%/step_%/task_% |
| # |
| # Automatically sync uid_% cgroups to be coherent |
| # with remaining job childs when one of them is removed |
| # by a call to this release agent. |
| # The synchronisation is made in a flock on the root cgroup |
| # to ensure coherency of the cgroups contents. |
| # |
| |
| progname=$(basename $0) |
| subsystem=${progname##*_} |
| |
| get_mount_dir() |
| { |
| local lssubsys=$(type -p lssubsys) |
| if [[ $lssubsys ]]; then |
| $lssubsys -m $subsystem | awk '{print $2}' |
| else |
| echo "/cgroup/$subsystem" |
| fi |
| } |
| |
| mountdir=$(get_mount_dir) |
| |
| if [[ $# -eq 0 ]] |
| then |
| echo "Usage: $(basename $0) [sync] cgroup" |
| exit 1 |
| fi |
| |
| # build orphan cg path |
| if [[ $# -eq 1 ]] |
| then |
| rmcg=${mountdir}$1 |
| else |
| rmcg=${mountdir}$2 |
| fi |
| slurmcg=${rmcg%/uid_*} |
| if [[ ${slurmcg} == ${rmcg} ]] |
| then |
| # not a slurm job pattern, perhaps the slurmcg, just remove |
| # the dir with a lock and exit |
| flock -x ${mountdir} -c "rmdir ${rmcg}" |
| exit $? |
| fi |
| orphancg=${slurmcg}/orphan |
| |
| # make sure orphan cgroup is existing |
| if [[ ! -d ${orphancg} ]] |
| then |
| mkdir ${orphancg} |
| case ${subsystem} in |
| cpuset) |
| cat ${mountdir}/cpuset.cpus > ${orphancg}/cpuset.cpus |
| cat ${mountdir}/cpuset.mems > ${orphancg}/cpuset.mems |
| ;; |
| *) |
| ;; |
| esac |
| fi |
| |
| # kernel call |
| if [[ $# -eq 1 ]] |
| then |
| |
| rmcg=${mountdir}$@ |
| |
| # try to extract the uid cgroup from the input one |
| # ( extract /uid_% from /uid%/job_*...) |
| uidcg=${rmcg%/job_*} |
| if [[ ${uidcg} == ${rmcg} ]] |
| then |
| # not a slurm job pattern, perhaps the uidcg, just remove |
| # the dir with a lock and exit |
| flock -x ${mountdir} -c "rmdir ${rmcg}" |
| exit $? |
| fi |
| |
| if [[ -d ${mountdir} ]] |
| then |
| flock -x ${mountdir} -c "$0 sync $@" |
| fi |
| |
| exit $? |
| |
| # sync subcall (called using flock by the kernel hook to be sure |
| # that no one is manipulating the hierarchy, i.e. PAM, SLURM, ...) |
| elif [[ $# -eq 2 ]] && [[ $1 == "sync" ]] |
| then |
| |
| shift |
| rmcg=${mountdir}$@ |
| uidcg=${rmcg%/job_*} |
| |
| # remove this cgroup |
| if [[ -d ${rmcg} ]] |
| then |
| case ${subsystem} in |
| memory) |
| # help to correctly remove lazy cleaning memcg |
| # but still not perfect |
| sleep 1 |
| ;; |
| *) |
| ;; |
| esac |
| rmdir ${rmcg} |
| fi |
| if [[ ${uidcg} == ${rmcg} ]] |
| then |
| ## not a slurm job pattern exit now do not sync |
| exit 0 |
| fi |
| |
| # sync the user cgroup based on targeted subsystem |
| # and the remaining job |
| if [[ -d ${uidcg} ]] |
| then |
| case ${subsystem} in |
| cpuset) |
| cpus=$(cat ${uidcg}/job_*/cpuset.cpus 2>/dev/null) |
| if [[ -n ${cpus} ]] |
| then |
| cpus=$(scontrol show hostnames $(echo ${cpus} | tr ' ' ',')) |
| cpus=$(echo ${cpus} | tr ' ' ',') |
| echo ${cpus} > ${uidcg}/cpuset.cpus |
| else |
| # first move the remaining processes to |
| # a cgroup reserved for orphaned processes |
| for t in $(cat ${uidcg}/tasks) |
| do |
| echo $t > ${orphancg}/tasks |
| done |
| # then remove the remaining cpus from the cgroup |
| echo "" > ${uidcg}/cpuset.cpus |
| fi |
| ;; |
| *) |
| ;; |
| esac |
| fi |
| |
| # error |
| else |
| echo "Usage: $(basename $0) [sync] cgroup" |
| exit 1 |
| fi |
| |
| exit 0 |