| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of SPANK plugin with hetjobs |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set cwd "[$bin_pwd]" |
| set file_in "$test_dir/input" |
| set file_out "$test_dir/output" |
| set file_prog "$cwd/${test_name}.prog" |
| set orig_spank_conf "$test_dir/orig_conf" |
| set new_spank_conf "$test_dir/new_conf" |
| set spank_out "$test_dir/spank.out" |
| set job_id 0 |
| |
| if {![is_super_user]} { |
| skip "This test must be run as SlurmUser" |
| } |
| if {[get_config_param "SchedulerType"] ne "sched/backfill"} { |
| skip "This test requires SchedulerType = sched/backfill" |
| } |
| |
| proc cleanup {} { |
| global spank_conf_file |
| global job_id file_prog |
| |
| cancel_job $job_id |
| |
| # |
| # Restore the original plugstack |
| # |
| restore_conf $spank_conf_file |
| |
| file delete ${file_prog}.so |
| } |
| |
| set node_count [llength [get_nodes_by_state idle,alloc,comp]] |
| if {$node_count < 2} { |
| skip "Insufficient node count to run test" |
| } |
| |
| # |
| # Build the plugin |
| # |
| exec $bin_rm -f ${file_prog}.so |
| exec $bin_cc -fPIC -shared -I${slurm_dir}/include -o ${file_prog}.so ${file_prog}.c |
| |
| set output [run_command_output -fail -nolog "$scontrol -V"] |
| regexp "slurm ($re_word_str)" $output - loc_slurm_ver |
| |
| if {[get_config_param "SLURM_VERSION"] != $loc_slurm_ver} { |
| skip "Slurmctld ([get_config_param SLURM_VERSION]) and local Slurm ($loc_slurm_ver) versions are not the same, can not continue" |
| } |
| |
| # |
| # Locate slurm.conf's directory, copy the original plugstack.conf file |
| # and create an updated one using our new plugin |
| # |
| set spank_conf_file [get_conf_path]/plugstack.conf |
| save_conf $spank_conf_file |
| run_command -fail "$bin_echo 'required ${file_prog}.so ${spank_out}' >> $spank_conf_file" |
| |
| # |
| # Test of locally logged messages(). |
| # Add sleep for NFS delays in propagating $spank_conf_file |
| # |
| # NOTE: This test will fail if plugstack.conf is not in a shared location |
| # The login node (where test is started) will modify the file and the |
| # slurmd/slurmstepd on the compute node will not get the updated |
| # configuration. |
| # |
| exec $bin_sleep 30 |
| log_info "Test locally logged messages........." |
| |
| log_debug "Checksum of local $spank_conf_file" |
| spawn $bin_sum $spank_conf_file |
| expect { |
| -re timeout { |
| fail "sbatch not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| make_bash_script $file_in " |
| $srun --mpi=none --test_suite_srun=5 --het-group=0,1 $bin_sum $spank_conf_file |
| " |
| |
| set matches 0 |
| spawn $sbatch --test_suite_sbatch=4 -t1 -o $file_out -n1 : -n1 $file_in |
| expect { |
| -re "_test_opt_process_sbatch: opt_arg_sbatch=4" { |
| incr matches |
| exp_continue |
| } |
| -re "Submitted batch job ($number)" { |
| set job_id $expect_out(1,string) |
| exp_continue |
| } |
| -re "slurm_spank_exit: opt_arg_sbatch=4 opt_arg_srun=0" { |
| incr matches |
| exp_continue |
| } |
| -re timeout { |
| fail "sbatch not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$job_id == 0} { |
| fail "Batch submit failure" |
| } |
| if {$matches != 3} { |
| fail "Spank options not processed by sbatch ($matches != 3)" |
| } |
| |
| # |
| # Wait for job to complete and check for files |
| # |
| wait_for_job -fail $job_id "DONE" |
| |
| # NOTE: spank logs from sbatch and srun would be intermingled here |
| wait_for_file -fail $file_out |
| set matches 0 |
| set matches_sbatch 0 |
| set matches_srun 0 |
| spawn $bin_cat $file_out |
| expect { |
| -re "error" { |
| fail "Some error happened" |
| } |
| -re "_test_opt_process_(.*?=$number)" { |
| set substr $expect_out(1,string) |
| if {[string equal "sbatch: opt_arg_sbatch=4" $substr]} { |
| incr matches_sbatch |
| } elseif {[string equal "srun: opt_arg_srun=5" $substr]} { |
| incr matches_srun |
| } |
| exp_continue |
| } |
| -re "slurm_spank_local_user_init" { |
| incr matches |
| exp_continue |
| } |
| -re "slurm_spank_exit: opt_arg_sbatch=4 opt_arg_srun=5" { |
| incr matches |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| set failure_addendum "This may be due to NFS synchronization problems: Multiple processes on different nodes writing to the same file. The SlurmdLogFile on each node should include SPANK logs for each step" |
| if {$matches_sbatch != 2} { |
| fail "Local (sbatch) sbatch spank plugin failure ($matches_sbatch != 2). $failure_addendum" |
| } |
| if {$matches_srun != 2} { |
| fail "Local (srun) srun spank plugin failure ($matches_srun != 2). $failure_addendum" |
| } |
| if {$matches != 2} { |
| fail "Local (srun) spank plugin failure ($matches != 2). $failure_addendum" |
| } |
| log_debug "Local (srun) spank plugin success" |
| |
| # |
| # Test for slurmd output in file |
| # |
| wait_for_file -fail $spank_out |
| set matches 0 |
| set matches_sbatch 0 |
| set matches_srun 0 |
| set matches_spank_exit 0 |
| set matches_spank_init 0 |
| spawn $bin_sort $spank_out |
| expect { |
| -re "slurm_spank_(\\S+): opt_arg_sbatch=($number) opt_arg_srun=($number)" { |
| set spank_type $expect_out(1,string) |
| set sbatch_arg $expect_out(2,string) |
| set srun_arg $expect_out(3,string) |
| |
| if {($spank_type eq "exit") && ($sbatch_arg eq "4")} { |
| # Skip (possible) external job containers |
| incr matches_spank_exit |
| } elseif {$spank_type eq "task_init"} { |
| incr matches_spank_init |
| } |
| |
| if {($sbatch_arg eq "4") && ($srun_arg eq "0")} { |
| incr matches_sbatch |
| } elseif {($sbatch_arg eq "4") && ($srun_arg eq "5")} { |
| incr matches_srun |
| } |
| exp_continue |
| } |
| -re "spank_get_item: argv" { |
| incr matches |
| exp_continue |
| } |
| -re "spank_get_item: my_uid=" { |
| incr matches |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| |
| set failure_addendum "Check for matching checksums on the plugstack.conf file. Different checksums could indicate file system delays" |
| if {$matches_spank_exit != 3} { |
| fail "Remote (slurmd ) spank_exit spank plugin failure ($matches_spank_exit != 3). $failure_addendum" |
| } |
| if {$matches_spank_init != 3} { |
| fail "Remote (slurmd) spank_init spank plugin failure ($matches_spank_init != 3). $failure_addendum" |
| } |
| if {$matches_sbatch != 2} { |
| fail "Remote (slurmd) sbatch spank plugin failure ($matches_sbatch != 2). $failure_addendum" |
| } |
| if {$matches_srun != 4} { |
| fail "Remote (slurmd) srun spank plugin failure ($matches_srun != 4). $failure_addendum" |
| } |
| if {$matches != 8} { |
| fail "Remote (slurmd) spank plugin failure ($matches != 8). $failure_addendum" |
| } |
| log_info "Remote (slurmd) spank plugin success" |