testsuite/expect/test37.17 - SchedMD/slurm - Git at Google

 #!/usr/bin/env expect
 ############################################################################
 # Purpose: Test local and remote job dependencies
 #
 # Reqs:    1. Using slurmdbd accounting storage type and is up
 #          2. fed_slurm_base is defined in globals.local - set to directory that
 #          has access to each federation configure (fedc1, fedc2, fedc3).
 #          Eg.
 #          fedr/slurm/ (src)
 #          fedr/fed1/bin
 #          fedr/fed1/sbin
 #          fedr/fed1/etc
 #          fedr/fed1/...
 #          fedr/fed2/...
 #          fedr/fed3/...
 #          3. controllers are up and running.
 ############################################################################
 # Copyright (C) SchedMD LLC.
 #
 # This file is part of Slurm, a resource management program.
 # For details, see <https://slurm.schedmd.com/>.
 # Please also read the included file: DISCLAIMER.
 #
 # Slurm is free software; you can redistribute it and/or modify it under
 # the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 # details.
 #
 # You should have received a copy of the GNU General Public License along
 # with Slurm; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 ############################################################################

 source ./globals
 source ./globals_accounting
 source ./globals_federation

 set c1 $fedc1
 set c2 $fedc2
 set c3 $fedc3
 set job_id1 0
 set job_id2 0
 set user_name [get_my_user_name]
 set file_in_long "$test_dir/long.in"
 set file_in_short "$test_dir/short.in"
 set my_scancel "${fed_slurm_base}/$c1/bin/scancel"
 set my_scontrol "${fed_slurm_base}/$c1/bin/scontrol"
 set reason ""
 set dependency ""
 set fed_name "fed_$test_name"
 set all_clusters "$c1,$c2,$c3"

 ###############################################################################
 # Functions
 ###############################################################################

 proc cancel_federation_jobs { } {
 	global user_name c1 c2 c3 my_scancel all_clusters test_id

 	spawn $my_scancel -M$all_clusters --jobname test${test_id}_job
 	expect {
 		eof {
 			wait
 		}
 	}
 	sleep 5
 }

 proc cancel_job { job_id clusters } {
 	global my_scancel

 	spawn $my_scancel $job_id
 	expect {
 		eof {
 			wait
 		}
 	}
 	wait_for_fed_job $job_id "DONE" $clusters
 }

 proc submit_job { options cdir file_in } {
 	global bin_sleep sbatch number fed_slurm_base test_name

 	set job_id 0
 	set my_sbatch "${fed_slurm_base}/$cdir/bin/sbatch"
 	if {![check_federation_setup]} {
 		set my_sbatch $sbatch
 	}
 	set command "$my_sbatch --job-name=${test_name}_job -t1 \
 		$options --output=/dev/null $file_in"
 	spawn {*}$command
 	expect {
 		-re "Submitted batch job ($number)" {
 			set job_id $expect_out(1,string)
 			exp_continue
 		}
 		timeout {
 			fail "sbatch not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	if { $job_id == 0 } {
 		fail "Failed to submit job"
 	}

 	return $job_id
 }

 proc get_job_dependency { job_id } {
 	global my_scontrol reason dependency re_word_str

 	set reason ""
 	set dependency ""
 	set reason_match "\[a-zA-Z_\]+"
 	# Possible dependency syntax:
 	# <type>:<jobid>+<time>(state)
 	# <type>:<jobid>(state)
 	# singleton(unfulfilled)
 	# Notes:
 	# * Multiple dependencies are separated by a comma or a question mark
 	# * The state for singleton will only ever be "unfulfilled"
 	# * The state is either failed or unfulfilled. Fulfilled dependencies
 	#   are cleared from the list
 	# * When there are no dependencies, it will be this string: "(null)"

 	# This regex takes care of handling dependencies separated by comma or
 	# question mark.
 	# ([a-zA-Z_]+:[0-9_*+]+\([a-zA-Z]+\)\?*\,*|singleton\(unfulfilled\)\?*\,*)+|\(null\)


 #set depend_match "\[a-zA-Z_\]+:\[0-9_*+\]+\\($re_word_str\\)\|\\(null\\)\|singleton\\(unfulfilled\\)"

 	set type "\[a-zA-Z_\]+"
 	set jobid_time "\[0-9_*+\]+"
 	set state "\\($re_word_str\\)\\"
 	set delim ",*\\?*"
 	set depend_regex "\($type:$jobid_time$state$delim\|singleton\\(unfulfilled\\)$delim\)+"
 	set no_depend "\\(null\\)"
 	set depend_match "$depend_regex\|$no_depend"

 	log_user 0
 	spawn $my_scontrol show job $job_id
 	expect {
 		-re "Reason=($reason_match) Dependency=($depend_match)" {
 			set reason $expect_out(1,string)
 			set dependency $expect_out(2,string)
 			exp_continue
 		}
 		timeout {
 			fail "scontrol not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1
 	log_info "job $job_id; actual   reason: \"$reason\"; dependency: \"$dependency\""
 	return $dependency
 }

 proc check_depend { job_id expected_reason expected_dependency } {
 	global reason dependency

 	get_job_dependency $job_id
 	if {"$reason" ne "$expected_reason"} {
 		return 1
 	}
 	if {"$dependency" ne "$expected_dependency"} {
 		return 1
 	}
 	return 0
 }

 proc wait_for_depend { job_id expected_reason expected_dependency } {
 	global reason dependency

 	set error 0
 	set my_delay 0
 	# By default we test remote dependencies every 30 seconds.
 	# Once the dependencies are cleared, we have to wait for the job to be
 	# scheduled which could be another 30-60 seconds depending on the
 	# main scheduler and backfill scheduler timing.
 	set max_delay 120
 	set poll_interval 3

 	if {$expected_reason ne "DependencyNeverSatisfied"} {
 		set want_never_satisfied 0
 	} else {
 		set want_never_satisfied 1
 	}

 	log_info "job $job_id; expected reason: \"$expected_reason\"; dependency: \"$expected_dependency\""

 	while 1 {
 		if {![check_depend $job_id $expected_reason $expected_dependency]} {
 			return 0
 		}

 		if {(!$want_never_satisfied) && $reason eq "DependencyNeverSatisfied"} {
 			log_error "Job dependency failed, but it shouldn't have."
 			set error 1
 		}
 		if {$my_delay >= $max_delay} {
 			log_info "delay $my_delay max $max_delay"
 			log_error "Timeout waiting for dependency to change."
 			set error 1
 		}

 		if {$error} {
 			fail "Job ($job_id) actual (reason=\"$reason\"; dependency=\"$dependency\") != expected (reason=\"$expected_reason\"; dependency=\"expected_dependency\")"
 		}

 		exec sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}
 }

 proc my_wait_for_fed_job { job_id state cluster } {
 	set ret_cluster [wait_for_fed_job $job_id $state $cluster]
 	if {$cluster ne $ret_cluster} {
 		fail "Job ($job_id) did not reach state $state on cluster ($cluster)"
 	}
 }

 proc is_job_on_cluster { job_id cluster } {
 	global my_scontrol

 	log_user 0
 	spawn $my_scontrol -M$cluster --local -o show job $job_id
 	expect {
 		-re "JobId=$job_id" {
 			log_user 1
 			log_info "Found job $job_id in cluster $cluster"
 			return 1
 		}
 		-re "Invalid job id specified" {
 			log_user 1
 			log_info "Did not find job $job_id in cluster $cluster"
 			return 0
 		}
 		timeout {
 			log_user 1
 			fail "scontrol not responding"
 		}
 		eof {
 			wait
 		}
 	}
 }

 proc test_local_after { } {
 	global c1 file_in_long bin_sleep
 	log_info "#############################################################"
 	log_info "# Test local after"
 	log_info "#############################################################"

 	# Local dependency succeeds
 	log_info "after: test that local dependency succeeds:"
 	set job_id1 [submit_job "-M$c1 --begin=now+5" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=after:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	wait_for_depend $job_id2 "Dependency" "after:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id1 $c1
 	cancel_job $job_id2 $c1

 }

 proc test_remote_after { } {
 	global c1 c2 file_in_long bin_sleep

 	log_info "#############################################################"
 	log_info "# Test remote after"
 	log_info "#############################################################"

 	# Remote dependency succeeds
 	log_info "after: test that remote dependency succeeds:"
 	set job_id1 [submit_job "-M$c2 --begin=now+5" $c2 $file_in_long]
 	set job_id2 [submit_job "--depend=after:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	wait_for_depend $job_id2 "Dependency" "after:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id1 $c2
 	cancel_job $job_id2 $c1

 	# Test after with a time attached. file_in_long sleeps for 60 seconds.
 	log_info "after: test that a after+time works:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=after:$job_id1+1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "after:$job_id1+1(unfulfilled)"
 	log_info "Check that job $job_id2 is still dependent after 45 seconds"
 	exec $bin_sleep 45
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "after:$job_id1+1(unfulfilled)"
 	log_info "Wait for job $job_id2 dependency to be fulfilled"
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# After dependency never fails.
 }

 proc test_local_afterany { } {
 	global c1 file_in_long

 	log_info "#############################################################"
 	log_info "# Test local afterany"
 	log_info "#############################################################"

 	# Local dependency succeeds
 	log_info "afterany: test that local dependency succeeds:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=afterany:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1
 }

 proc test_remote_afterany { } {
 	global c1 c2 file_in_long

 	log_info "#############################################################"
 	log_info "# Test remote afterany"
 	log_info "#############################################################"

 	# Remote dependency succeeds
 	log_info "afterany: test that remote dependency succeeds:"
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id2 [submit_job "--depend=afterany:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	cancel_job $job_id1 $c2
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# Test old syntax: --depend=jobid,jobid
 	log_info "afterany: test old syntax: --depend=jobid\[,jobid,jobid...\]"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=$job_id1 -M$c1" $c1 \
 		$file_in_long]
 	set job_id3 [submit_job "--depend=$job_id1,$job_id2 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
 	wait_for_depend $job_id3 "Dependency" \
 		"afterany:$job_id1\(unfulfilled\),afterany:$job_id2\(unfulfilled\)"
 	cancel_federation_jobs
 }

 proc test_local_aftercorr { } {
 	global c1 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test local aftercorr"
 	log_info "#############################################################"

 	# Local dependency succeeds
 	log_info "aftercorr: test that local dependency succeeds:"
 	set job_array1 [submit_job "-M$c1 --array=1-2" $c1 $file_in_short]
 	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
 		--array=1-2" $c1 $file_in_long]

 	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
 	wait_for_depend "$job_array2\_1" "Dependency" \
 		"aftercorr:$job_array1\_*(unfulfilled)"
 	wait_for_depend "$job_array2\_2" "Dependency" \
 		"aftercorr:$job_array1\_*(unfulfilled)"

 	my_wait_for_fed_job "$job_array1\_1" "DONE" $c1
 	my_wait_for_fed_job "$job_array1\_2" "DONE" $c1
 	wait_for_depend "$job_array2\_1" "None" "(null)"
 	wait_for_depend "$job_array2\_2" "None" "(null)"
 	my_wait_for_fed_job "$job_array2\_1" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "RUNNING" $c1
 	cancel_job $job_array2 $c1

 	# Local dependency fails
 	log_info "aftercorr: test that local dependency fails:"
 	set job_array1 [submit_job "-M$c1 --array=1-2" $c1 $file_in_long]
 	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
 		--array=1-2" $c1 $file_in_long]

 	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
 	wait_for_depend "$job_array2\_1" "Dependency" \
 		"aftercorr:$job_array1\_*(unfulfilled)"
 	wait_for_depend "$job_array2\_2" "Dependency" \
 		"aftercorr:$job_array1\_*(unfulfilled)"

 	cancel_job "$job_array1\_1" $c1
 	wait_for_depend "$job_array2\_1" "DependencyNeverSatisfied" \
 		"aftercorr:$job_array1\_*(failed)"
 	cancel_job "$job_array1\_2" $c1
 	wait_for_depend "$job_array2\_2" "DependencyNeverSatisfied" \
 		"aftercorr:$job_array1\_*(failed)"
 	if { !$kill_invalid_depend } {
 		cancel_job "$job_array2" $c1
 	}
 }

 proc test_remote_aftercorr { } {
 	global c1 c2 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test remote aftercorr"
 	log_info "#############################################################"

 	# Remote dependency succeeds
 	log_info "aftercorr: test that remote dependency succeeds:"
 	set job_array1 [submit_job "-M$c2 --array=1-2" $c2 $file_in_short]
 	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
 		--array=1-2" $c1 $file_in_long]

 	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c2
 	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c2
 	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
 	# The dependency on the remote side has _*, but the dependency locally
 	# doesn't because it couldn't find the remote job.
 	wait_for_depend "$job_array2\_1" "Dependency" \
 		"aftercorr:$job_array1\(unfulfilled)"
 	wait_for_depend "$job_array2\_2" "Dependency" \
 		"aftercorr:$job_array1\(unfulfilled)"

 	my_wait_for_fed_job "$job_array1\_1" "DONE" $c2
 	my_wait_for_fed_job "$job_array1\_2" "DONE" $c2
 	wait_for_depend "$job_array2\_1" "None" "(null)"
 	wait_for_depend "$job_array2\_2" "None" "(null)"
 	my_wait_for_fed_job "$job_array2\_1" "RUNNING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "RUNNING" $c1
 	cancel_job $job_array2 $c1

 	# Remote dependency fails
 	log_info "aftercorr: test that remote dependency fails:"
 	set job_array1 [submit_job "-M$c2 --array=1-2" $c2 $file_in_long]
 	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
 		--array=1-2" $c1 $file_in_long]

 	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c2
 	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c2
 	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
 	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
 	wait_for_depend "$job_array2\_1" "Dependency" \
 		"aftercorr:$job_array1\(unfulfilled)"
 	wait_for_depend "$job_array2\_2" "Dependency" \
 		"aftercorr:$job_array1\(unfulfilled)"

 	cancel_job "$job_array1\_1" $c2
 	wait_for_depend "$job_array2\_1" "DependencyNeverSatisfied" \
 		"aftercorr:$job_array1\(failed)"
 	cancel_job "$job_array1\_2" $c2
 	wait_for_depend "$job_array2\_2" "DependencyNeverSatisfied" \
 		"aftercorr:$job_array1\(failed)"
 	if { !$kill_invalid_depend } {
 		cancel_job "$job_array2" $c1
 	}
 }

 proc test_local_afterok { } {
 	global c1 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test local afterok"
 	log_info "#############################################################"

 	# Local dependency succeeds
 	log_info "afterok: test that local dependency succeeds:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_short]
 	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "DONE" $c1
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# Local dependency fails
 	log_info "afterok: test that local dependency fails:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id2 $c1
 	}
 }

 proc test_remote_afterok { } {
 	global c1 c2 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test remote afterok"
 	log_info "#############################################################"

 	# Remote dependency succeeds
 	log_info "afterok: test that remote dependency succeeds:"
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_short]
 	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "DONE" $c2
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# Remote dependency fails
 	log_info "afterok: test that remote dependency fails"
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
 	cancel_job $job_id1 $c2
 	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id2 $c1
 	}
 }

 proc test_local_afternotok { } {
 	global c1 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test local afternotok"
 	log_info "#############################################################"

 	# Local dependency succeeds
 	log_info "afternotok: test that local dependency succeeds:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)"
 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# Local dependency fails
 	log_info "afternotok: test that local dependency fails:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_short]
 	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "DONE" $c1
 	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
 		"afternotok:$job_id1\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id2 $c1
 	}
 }

 proc test_remote_afternotok { } {
 	global c1 c2 kill_invalid_depend file_in_long file_in_short

 	log_info "#############################################################"
 	log_info "# Test remote afternotok"
 	log_info "#############################################################"

 	# Remote dependency succeeds
 	log_info "afternotok: test that remote dependency succeeds:"
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)"
 	cancel_job $job_id1 $c2
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 	# Remote dependency fails
 	log_info "afternotok: test that remote dependency fails"
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_short]
 	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
 		$file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)"
 	my_wait_for_fed_job $job_id1 "DONE" $c2
 	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
 		"afternotok:$job_id1\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id2 $c1
 	}
 }

 proc test_local_singleton { } {
 	global c1 disable_remote_singleton file_in_long

 	log_info "#############################################################"
 	log_info "# Test local singleton"
 	log_info "#############################################################"

 	# Test one cluster
 	log_info "singleton: test on one cluster"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "--depend=singleton -M$c1" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" "singleton(unfulfilled)"
 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id2 "None" "(null)"
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	cancel_job $job_id2 $c1

 }

 proc test_remote_singleton { } {
 	global c1 c2 c3 disable_remote_singleton file_in_long

 	log_info "#############################################################"
 	log_info "# Test remote singleton"
 	log_info "#############################################################"

 	# Test multiple clusters
 	if { $disable_remote_singleton } {
 		# Test that remote jobs don't affect the singleton dependency
 		log_info "singleton: test that disable_remote_singleton works"
 		set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 		set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 		set job_id3 [submit_job "--depend=singleton -M$c1" $c1 \
 			$file_in_long]

 		my_wait_for_fed_job $job_id1 "RUNNING" $c1
 		my_wait_for_fed_job $job_id2 "RUNNING" $c2
 		my_wait_for_fed_job $job_id3 "PENDING" $c1
 		wait_for_depend $job_id3 "Dependency" "singleton(unfulfilled)"
 		# Cancel job 1 - job 3 should start running even though job 2 is
 		# running on another cluster
 		cancel_job $job_id1 $c1
 		wait_for_depend $job_id3 "None" "(null)"
 		my_wait_for_fed_job $job_id3 "RUNNING" $c1
 		cancel_job $job_id2 $c2
 		cancel_job $job_id3 $c1
 	} else {
 		# Test that singleton doesn't get cleared until jobs on all
 		# clusters are done
 		log_info "singleton: test with jobs on all clusters"
 		set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 		set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 		set job_id3 [submit_job "-M$c3" $c3 $file_in_long]
 		set job_id4 [submit_job "--depend=singleton -M$c1" $c1 \
 			$file_in_long]

 		my_wait_for_fed_job $job_id1 "RUNNING" $c1
 		my_wait_for_fed_job $job_id2 "RUNNING" $c2
 		my_wait_for_fed_job $job_id3 "RUNNING" $c3
 		my_wait_for_fed_job $job_id4 "PENDING" $c1
 		wait_for_depend $job_id4 "Dependency" "singleton(unfulfilled)"

 		# Job 4 shouldn't start until jobs 1, 2, and 3 are done.
 		# Test that it starts when a remote job is finished last.
 		cancel_job $job_id1 $c1
 		# Should still have the same dependency
 		wait_for_depend $job_id4 "Dependency" "singleton(unfulfilled)"
 		cancel_job $job_id2 $c2
 		cancel_job $job_id3 $c3
 		# Now the dependency should be cleared
 		wait_for_depend $job_id4 "None" "(null)"
 		my_wait_for_fed_job $job_id4 "RUNNING" $c1
 		cancel_job $job_id4 $c1
 	}
 }

 proc test_add_remove_clusters { } {
 	global c1 c2 c3 fed_name file_in_long \
 		disable_remote_singleton kill_invalid_depend

 	# Test adding/removing clusters from the federation
 	# Removing a cluster from a federation should cause dependencies on
 	# jobs on that cluster to fail.
 	# Adding a cluster to a federation means that any singleton dependencies
 	# have to be fulfilled on that cluster.
 	log_info "#############################################################"
 	log_info "# Test adding/removing a cluster from the federation"
 	log_info "#############################################################"

 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id3 [submit_job "-M$c3" $c3 $file_in_long]
 	set job_id4 [submit_job "--depend=afterok:$job_id3 -M$c1" $c1 \
 		$file_in_long]
 	set job_id5 [submit_job "--depend=singleton -M$c1" $c1 $file_in_long]
 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c2
 	my_wait_for_fed_job $job_id3 "RUNNING" $c3
 	my_wait_for_fed_job $job_id4 "PENDING" $c1
 	wait_for_depend $job_id4 "Dependency" "afterok:$job_id3\(unfulfilled\)"
 	wait_for_depend $job_id5 "Dependency" "singleton(unfulfilled)"

 	log_info "Test that removing cluster $c3 from fed $fed_name makes dependencies on jobs on $c3 fail"
 	if [remove_cluster_from_fed $c3 $fed_name] {
 		fail "Unable to remove cluster ($c3) from federation ($fed_name)"
 	}
 	wait_for_depend $job_id4 "DependencyNeverSatisfied" \
 		"afterok:$job_id3\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id4 $c1
 	}

 	if { $disable_remote_singleton } {
 		cancel_job $job_id1 $c1
 		cancel_job $job_id2 $c2
 		cancel_job $job_id3 $c3
 		cancel_job $job_id5 $c1
 		return
 	}

 	log_info "Test that the singleton dependency was resent back to cluster $c3 when it was added back to the federation."
 	if [add_cluster_to_fed $c3 $fed_name] {
 		fail "Unable to add cluster ($c3) to federation ($fed_name)"
 	}
 	cancel_job $job_id1 $c1
 	cancel_job $job_id2 $c2
 	cancel_job $job_id3 $c3
 	wait_for_depend $job_id5 "None" "(null)"
 	my_wait_for_fed_job $job_id5 "RUNNING" $c1
 	cancel_job $job_id5 $c1
 }

 proc test_submit_to_all_clusters { } {
 	global c1 c2 c3 file_in_long

 	log_info "#############################################################"
 	log_info "# Test submitting a dependent job to all clusters"
 	log_info "#############################################################"

 	log_info "Test that a dependent job is only on its origin cluster while dependent and that it is submitted to all clusters when its dependency is cleared."
 	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1,$c2,$c3 \
 		--begin=now+60" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c2
 	my_wait_for_fed_job $job_id2 "PENDING" $c1
 	wait_for_depend $job_id2 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)"

 	log_info "Test that job $job_id2 is not on clusters $c2 or $c3."
 	if { [is_job_on_cluster $job_id2 $c2] || \
 		[is_job_on_cluster $job_id2 $c3] } {
 			fail "Job ($job_id2) is in cluster ($c2 and/or $c3) when it shouldn't be."
 	}

 	log_info "Test that job $job_id2 is submitted to all sibling clusters $c2 and $c3 when its dependency is fulfilled."
 	cancel_job $job_id1 $c2
 	wait_for_depend $job_id2 "BeginTime" "(null)"
 	my_wait_for_fed_job $job_id2 "PENDING" "$c1"
 	my_wait_for_fed_job $job_id2 "PENDING" "$c2"
 	my_wait_for_fed_job $job_id2 "PENDING" "$c3"
 	cancel_job $job_id2 "$c1,$c2,$c3"
 }

 proc test_local_or_dependencies { } {
 	global c1 file_in_long kill_invalid_depend

 	log_info "#############################################################"
 	log_info "# Test local OR dependencies"
 	log_info "#############################################################"

 	log_info "OR dependencies: Test that one fulfilled dependency makes the whole dependency fulfilled:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id3 [submit_job \
 		"--depend=afternotok:$job_id1?afternotok:$job_id2 -M$c1" \
 		$c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)?afternotok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id2 $c1
 	wait_for_depend $job_id3 "None" "(null)"
 	my_wait_for_fed_job $job_id3 "RUNNING" $c1
 	cancel_job $job_id1 $c1
 	cancel_job $job_id3 $c1

 	log_info "OR dependencies: Test that the dependency doesn't fail until all dependencies have failed:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id3 [submit_job "--depend=afterok:$job_id1?afterok:$job_id2 \
 		-M$c1" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(unfulfilled\)?afterok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(failed\)?afterok:$job_id2\(unfulfilled\)"
 	cancel_job $job_id2 $c1
 	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(failed\)?afterok:$job_id2\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id3 $c1
 	}
 }

 proc test_remote_or_dependencies { } {
 	global c1 c2 file_in_long kill_invalid_depend

 	log_info "#############################################################"
 	log_info "# Test remote OR dependencies"
 	log_info "#############################################################"

 	log_info "OR dependencies: Test that one fulfilled dependency makes the whole dependency fulfilled:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id3 [submit_job \
 		"--depend=afternotok:$job_id1?afternotok:$job_id2 -M$c1" \
 		$c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c2
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\)?afternotok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id2 $c2
 	wait_for_depend $job_id3 "None" "(null)"
 	my_wait_for_fed_job $job_id3 "RUNNING" $c1
 	cancel_job $job_id1 $c1
 	cancel_job $job_id3 $c1

 	log_info "OR dependencies: Test that the dependency doesn't fail until all dependencies have failed:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id3 [submit_job "--depend=afterok:$job_id1?afterok:$job_id2 \
 		-M$c1" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c2
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(unfulfilled\)?afterok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(failed\)?afterok:$job_id2\(unfulfilled\)"
 	cancel_job $job_id2 $c2
 	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(failed\)?afterok:$job_id2\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id3 $c1
 	}
 }

 proc test_local_and_dependencies { } {
 	global c1 file_in_long kill_invalid_depend

 	log_info "#############################################################"
 	log_info "# Test local AND dependencies"
 	log_info "#############################################################"

 	log_info "AND dependencies: Test that the dependency isn't fulfilled until all dependencies are fulfilled:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id3 [submit_job \
 		"--depend=afternotok:$job_id1,afternotok:$job_id2 -M$c1" \
 		$c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\),afternotok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id2\(unfulfilled\)"
 	cancel_job $job_id2 $c1
 	wait_for_depend $job_id3 "None" "(null)"
 	my_wait_for_fed_job $job_id3 "RUNNING" $c1
 	cancel_job $job_id3 $c1

 	log_info "AND dependencies: Test that the whole dependency fails when a single dependency fails:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id3 [submit_job "--depend=afterok:$job_id1,afterok:$job_id2 \
 		-M$c1" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c1
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id2 $c1
 	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id3 $c1
 	}
 	cancel_job $job_id1 $c1
 }

 proc test_remote_and_dependencies { } {
 	global c1 c2 file_in_long kill_invalid_depend

 	log_info "#############################################################"
 	log_info "# Test remote AND dependencies"
 	log_info "#############################################################"

 	log_info "AND dependencies: Test that the dependency isn't fulfilled until all dependencies are fulfilled:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id3 [submit_job \
 		"--depend=afternotok:$job_id1,afternotok:$job_id2 -M$c1" \
 		$c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c2
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id1\(unfulfilled\),afternotok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id1 $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afternotok:$job_id2\(unfulfilled\)"
 	cancel_job $job_id2 $c2
 	wait_for_depend $job_id3 "None" "(null)"
 	my_wait_for_fed_job $job_id3 "RUNNING" $c1
 	cancel_job $job_id3 $c1

 	log_info "AND dependencies: Test that the whole dependency fails when a single dependency fails:"
 	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
 	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
 	set job_id3 [submit_job "--depend=afterok:$job_id1,afterok:$job_id2 \
 		-M$c1" $c1 $file_in_long]

 	my_wait_for_fed_job $job_id1 "RUNNING" $c1
 	my_wait_for_fed_job $job_id2 "RUNNING" $c2
 	my_wait_for_fed_job $job_id3 "PENDING" $c1
 	wait_for_depend $job_id3 "Dependency" \
 		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(unfulfilled\)"

 	cancel_job $job_id2 $c2
 	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
 		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(failed\)"
 	if { !$kill_invalid_depend } {
 		cancel_job $job_id3 $c1
 	}
 	cancel_job $job_id1 $c1
 }

 if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
 	skip "This test can't be run without a usable AccountStorageType"
 }

 if {[check_federation_setup]} {
 	if {![check_federation_up]} {
 		skip "All of the clusters in the federation must be up"
 	}

 	delete_federations $fed_name
 	if [setup_federation $fed_name] {
 		fail "Unable to set up federation ($fed_name)"
 	}
 } else {
 	log_warn "Not running remote dependency tests"
 	set c1 [get_config_param "ClusterName"]
 	set all_clusters "$c1"
 	set my_scancel $scancel
 	set my_scontrol $scontrol
 }

 proc cleanup {} {
 	global fed_name bin_rm file_in_long file_in_short

 	delete_federations $fed_name
 	cancel_federation_jobs
 	exec $bin_rm -f $file_in_long $file_in_short
 }

 ###############################################################################
 # Begin test
 ###############################################################################

 # Use file_in_short when we have to wait for the job to end.
 # Use file_in_long everywhere else.
 make_bash_script $file_in_long "$bin_sleep 60"
 make_bash_script $file_in_short "$bin_sleep 5"

 set kill_invalid_depend [param_contains [get_config_param "DependencyParameters"] "kill_invalid_depend"]
 set disable_remote_singleton [param_contains [get_config_param "DependencyParameters"]  "disable_remote_singleton"]
 log_info "kill_invalid_depend: $kill_invalid_depend; disable_remote_singleton: $disable_remote_singleton\n"

 cancel_federation_jobs

 test_local_after
 test_local_afterany
 # --depend=afterburstbuffer is tested in test35.6
 test_local_aftercorr
 test_local_afterok
 test_local_afternotok
 test_local_singleton
 test_local_or_dependencies
 test_local_and_dependencies
 # Test --depend=expand in another test

 if {![check_federation_setup]} {
 	skip "Subtests with federations cannot be run"
 } else {
 	test_remote_after
 	test_remote_afterany
 	test_remote_aftercorr
 	test_remote_afterok
 	test_remote_afternotok
 	test_remote_singleton
 	test_remote_or_dependencies
 	test_remote_and_dependencies
 	test_add_remove_clusters
 	test_submit_to_all_clusters
 }