testsuite/expect/test1.29 - SchedMD/slurm - Git at Google

 #!/usr/bin/env expect
 ############################################################################
 # Purpose: Test of Slurm functionality
 #          Verify that user user limits are propagated to the job.
 ############################################################################
 # Copyright (C) 2002 The Regents of the University of California.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Morris Jette <jette1@llnl.gov>
 # CODE-OCEC-09-009. All rights reserved.
 #
 # This file is part of Slurm, a resource management program.
 # For details, see <https://slurm.schedmd.com/>.
 # Please also read the included file: DISCLAIMER.
 #
 # Slurm is free software; you can redistribute it and/or modify it under
 # the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 # details.
 #
 # You should have received a copy of the GNU General Public License along
 # with Slurm; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 ############################################################################
 source ./globals

 set file_err         "$test_dir/error"
 set file_in          "$test_dir/input"
 set file_out         "$test_dir/output"
 set file_prog_get    "$test_name.prog"
 set job_id           0
 set limit_core       943
 set limit_fsize      674515
 set limit_nofile     1016
 set limit_nproc      30500
 set limit_stack      5021

 if {[get_config_param "PropagateResourceLimits"] ne "ALL" || [get_config_param "PropagateResourceLimitsExcept"] ne "(null)"} {
 	skip "This test need PropagateResourceLimits ALL"
 }

 proc cleanup {} {
 	global file_prog_get

 	file delete $file_prog_get
 }

 proc test_limit_propagation {} {
 	global file_out bin_sort number
 	global limit_core limit_fsize limit_nofile limit_nproc limit_stack

 	set matches          0

 	#
 	# Inspect the job's output file
 	#
 	wait_for_file -fail $file_out
 	spawn $bin_sort $file_out
 	expect {
 		-re "USER_CORE=($number)" {
 			set target [expr $limit_core * 1024]
 			if {$expect_out(1,string) == $target} {
 				incr matches
 			} else {
 				log_warn "CORE value bad ($expect_out(1,string) != $target)"
 			}
 			exp_continue
 		}
 		-re "USER_FSIZE=($number)" {
 			set target [expr $limit_fsize * 1024]
 			if {$expect_out(1,string) == $target} {
 				incr matches
 			} else {
 				log_warn "FSIZE value bad ($expect_out(1,string) != $target)"
 			}
 			exp_continue
 		}
 		-re "USER_NOFILE=($number)" {
 			if {$expect_out(1,string) == $limit_nofile} {
 				incr matches
 			} else {
 				log_warn "NOFILE value bad ($expect_out(1,string) != $limit_nofile)"
 			}
 			exp_continue
 		}
 		-re "USER_NPROC=($number)" {
 			if {$expect_out(1,string) == $limit_nproc} {
 				incr matches
 			} else {
 				log_warn "NPROC value bad ($expect_out(1,string) != $limit_nproc)"
 			}
 			exp_continue
 		}
 		-re "USER_NPROC unsupported" {
 			incr matches
 			exp_continue
 		}
 		-re "USER_STACK=($number)" {
 			set target [expr $limit_stack * 1024]
 			if {$expect_out(1,string) == [expr $limit_stack * 1024]} {
 				incr matches
 			} else {
 				log_warn "STACK value bad ($expect_out(1,string) != $target)"
 			}
 			exp_continue
 		}
 		-re "USER_STACK unsupported" {
 			incr matches
 			exp_continue
 		}
 		timeout {
 			fail "sort not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	set target 5
 	subtest {$matches == $target} "Rlimits should be propagated" "$matches != $target. A long running slurmd could cause a file size limit error. slurmd could have been started with limits lower than user launching the task"
 }

 #
 # We use our own program to get ulimit values since the output
 # of the ulimit program is inconsistent across systems.
 #
 exec $bin_cc -O ${file_prog_get}.c -o $file_prog_get
 exec $bin_chmod 700 $file_prog_get


 log_info "Testing limits propagation submitting with srun"
 make_bash_script $file_in "
   ulimit -c $limit_core || exit 1
   ulimit -f $limit_fsize || exit 1
   ulimit -n $limit_nofile || exit 1
   ulimit -u $limit_nproc || exit 1
   ulimit -s $limit_stack || exit 1
   $srun -n1 --output=$file_out --error=$file_err -t1 ./$file_prog_get || exit 1
 "
 set result [run_command $file_in]
 if {[dict get $result exit_code]} {
 	skip "Test should be able to set ulimits and submit a job: [dict get $result output]"
 }
 test_limit_propagation


 log_info "Testing limits propagation with sbatch and srun step"
 file delete $file_in $file_out
 make_bash_script $file_in "
   ulimit -c $limit_core || exit 1
   ulimit -f $limit_fsize || exit 1
   ulimit -n $limit_nofile || exit 1
   ulimit -u $limit_nproc || exit 1
   ulimit -s $limit_stack || exit 1
   $sbatch -n1 --output=$file_out --error=$file_err -t1 --wrap '$srun ./$file_prog_get' || exit 1
 "
 set result [run_command $file_in]
 set output [dict get $result output]
 if {[dict get $result exit_code]} {
 	skip "Test should be able to set ulimits and submit a job: $output"
 }
 if {![regexp {Submitted \S+ job (\d+)} $output - job_id]} {
 	fail "Unable to submit batch script"
 }
 wait_for_job -fail $job_id "DONE"
 test_limit_propagation


 log_info "Testing limits propagation to batch step"
 file delete $file_in $file_out
 make_bash_script $file_in "
   ulimit -c $limit_core || exit 1
   ulimit -f $limit_fsize || exit 1
   ulimit -n $limit_nofile || exit 1
   ulimit -u $limit_nproc || exit 1
   ulimit -s $limit_stack || exit 1
   $sbatch -n1 --output=$file_out --error=$file_err -t1 --wrap './$file_prog_get' || exit 1
 "
 set result [run_command $file_in]
 set output [dict get $result output]
 if {[dict get $result exit_code]} {
 	skip "Test should be able to set ulimits and submit a job: $output"
 }
 if {![regexp {Submitted \S+ job (\d+)} $output - job_id]} {
 	fail "Unable to submit batch script"
 }
 wait_for_job -fail $job_id "DONE"
 test_limit_propagation


 log_info "Testing limits propagation from batch script to steps"
 file delete $file_in $file_out
 make_bash_script $file_in "
   ulimit -c $limit_core || exit 1
   ulimit -f $limit_fsize || exit 1
   ulimit -n $limit_nofile || exit 1
   ulimit -u $limit_nproc || exit 1
   ulimit -s $limit_stack || exit 1
   $srun ./$file_prog_get || exit 1
 "
 set job_id [submit_job -fail "-n1 --output=$file_out --error=$file_err -t1 $file_in"]

 wait_for_job -fail $job_id "DONE"
 test_limit_propagation
	#!/usr/bin/env expect
	############################################################################
	# Purpose: Test of Slurm functionality
	# Verify that user user limits are propagated to the job.
	############################################################################
	# Copyright (C) 2002 The Regents of the University of California.
	# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
	# Written by Morris Jette <jette1@llnl.gov>
	# CODE-OCEC-09-009. All rights reserved.
	#
	# This file is part of Slurm, a resource management program.
	# For details, see <https://slurm.schedmd.com/>.
	# Please also read the included file: DISCLAIMER.
	#
	# Slurm is free software; you can redistribute it and/or modify it under
	# the terms of the GNU General Public License as published by the Free
	# Software Foundation; either version 2 of the License, or (at your option)
	# any later version.
	#
	# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
	# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	# details.
	#
	# You should have received a copy of the GNU General Public License along
	# with Slurm; if not, write to the Free Software Foundation, Inc.,
	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	############################################################################
	source ./globals

	set file_err "$test_dir/error"
	set file_in "$test_dir/input"
	set file_out "$test_dir/output"
	set file_prog_get "$test_name.prog"
	set job_id 0
	set limit_core 943
	set limit_fsize 674515
	set limit_nofile 1016
	set limit_nproc 30500
	set limit_stack 5021

	if {[get_config_param "PropagateResourceLimits"] ne "ALL" \|\| [get_config_param "PropagateResourceLimitsExcept"] ne "(null)"} {
	skip "This test need PropagateResourceLimits ALL"
	}

	proc cleanup {} {
	global file_prog_get

	file delete $file_prog_get
	}

	proc test_limit_propagation {} {
	global file_out bin_sort number
	global limit_core limit_fsize limit_nofile limit_nproc limit_stack

	set matches 0

	#
	# Inspect the job's output file
	#
	wait_for_file -fail $file_out
	spawn $bin_sort $file_out
	expect {
	-re "USER_CORE=($number)" {
	set target [expr $limit_core * 1024]
	if {$expect_out(1,string) == $target} {
	incr matches
	} else {
	log_warn "CORE value bad ($expect_out(1,string) != $target)"
	}
	exp_continue
	}
	-re "USER_FSIZE=($number)" {
	set target [expr $limit_fsize * 1024]
	if {$expect_out(1,string) == $target} {
	incr matches
	} else {
	log_warn "FSIZE value bad ($expect_out(1,string) != $target)"
	}
	exp_continue
	}
	-re "USER_NOFILE=($number)" {
	if {$expect_out(1,string) == $limit_nofile} {
	incr matches
	} else {
	log_warn "NOFILE value bad ($expect_out(1,string) != $limit_nofile)"
	}
	exp_continue
	}
	-re "USER_NPROC=($number)" {
	if {$expect_out(1,string) == $limit_nproc} {
	incr matches
	} else {
	log_warn "NPROC value bad ($expect_out(1,string) != $limit_nproc)"
	}
	exp_continue
	}
	-re "USER_NPROC unsupported" {
	incr matches
	exp_continue
	}
	-re "USER_STACK=($number)" {
	set target [expr $limit_stack * 1024]
	if {$expect_out(1,string) == [expr $limit_stack * 1024]} {
	incr matches
	} else {
	log_warn "STACK value bad ($expect_out(1,string) != $target)"
	}
	exp_continue
	}
	-re "USER_STACK unsupported" {
	incr matches
	exp_continue
	}
	timeout {
	fail "sort not responding"
	}
	eof {
	wait
	}
	}

	set target 5
	subtest {$matches == $target} "Rlimits should be propagated" "$matches != $target. A long running slurmd could cause a file size limit error. slurmd could have been started with limits lower than user launching the task"
	}

	#
	# We use our own program to get ulimit values since the output
	# of the ulimit program is inconsistent across systems.
	#
	exec $bin_cc -O ${file_prog_get}.c -o $file_prog_get
	exec $bin_chmod 700 $file_prog_get


	log_info "Testing limits propagation submitting with srun"
	make_bash_script $file_in "
	ulimit -c $limit_core \|\| exit 1
	ulimit -f $limit_fsize \|\| exit 1
	ulimit -n $limit_nofile \|\| exit 1
	ulimit -u $limit_nproc \|\| exit 1
	ulimit -s $limit_stack \|\| exit 1
	$srun -n1 --output=$file_out --error=$file_err -t1 ./$file_prog_get \|\| exit 1
	"
	set result [run_command $file_in]
	if {[dict get $result exit_code]} {
	skip "Test should be able to set ulimits and submit a job: [dict get $result output]"
	}
	test_limit_propagation


	log_info "Testing limits propagation with sbatch and srun step"
	file delete $file_in $file_out
	make_bash_script $file_in "
	ulimit -c $limit_core \|\| exit 1
	ulimit -f $limit_fsize \|\| exit 1
	ulimit -n $limit_nofile \|\| exit 1
	ulimit -u $limit_nproc \|\| exit 1
	ulimit -s $limit_stack \|\| exit 1
	$sbatch -n1 --output=$file_out --error=$file_err -t1 --wrap '$srun ./$file_prog_get' \|\| exit 1
	"
	set result [run_command $file_in]
	set output [dict get $result output]
	if {[dict get $result exit_code]} {
	skip "Test should be able to set ulimits and submit a job: $output"
	}
	if {![regexp {Submitted \S+ job (\d+)} $output - job_id]} {
	fail "Unable to submit batch script"
	}
	wait_for_job -fail $job_id "DONE"
	test_limit_propagation


	log_info "Testing limits propagation to batch step"
	file delete $file_in $file_out
	make_bash_script $file_in "
	ulimit -c $limit_core \|\| exit 1
	ulimit -f $limit_fsize \|\| exit 1
	ulimit -n $limit_nofile \|\| exit 1
	ulimit -u $limit_nproc \|\| exit 1
	ulimit -s $limit_stack \|\| exit 1
	$sbatch -n1 --output=$file_out --error=$file_err -t1 --wrap './$file_prog_get' \|\| exit 1
	"
	set result [run_command $file_in]
	set output [dict get $result output]
	if {[dict get $result exit_code]} {
	skip "Test should be able to set ulimits and submit a job: $output"
	}
	if {![regexp {Submitted \S+ job (\d+)} $output - job_id]} {
	fail "Unable to submit batch script"
	}
	wait_for_job -fail $job_id "DONE"
	test_limit_propagation


	log_info "Testing limits propagation from batch script to steps"
	file delete $file_in $file_out
	make_bash_script $file_in "
	ulimit -c $limit_core \|\| exit 1
	ulimit -f $limit_fsize \|\| exit 1
	ulimit -n $limit_nofile \|\| exit 1
	ulimit -u $limit_nproc \|\| exit 1
	ulimit -s $limit_stack \|\| exit 1
	$srun ./$file_prog_get \|\| exit 1
	"
	set job_id [submit_job -fail "-n1 --output=$file_out --error=$file_err -t1 $file_in"]

	wait_for_job -fail $job_id "DONE"
	test_limit_propagation