| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Stress test of per-task input files |
| ############################################################################ |
| # Copyright (C) 2002-2007 The Regents of the University of California. |
| # Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # CODE-OCEC-09-009. All rights reserved. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set file_in "$test_dir/input" |
| set file_in_task "$test_name.%t.input" |
| set file_out "$test_dir/output" |
| set job_name $test_name |
| set cycle_count [get_cycle_count] |
| set task_cnt $max_stress_tasks |
| set node_cnt 1-4 |
| set other_opts "-O" |
| |
| proc cleanup {} { |
| global task_cnt test_name |
| |
| # Destroy all input/output files |
| for {set tsk 0} {$tsk < $task_cnt} {incr tsk} { |
| set file_in_glob "$test_name.$tsk.input" |
| file delete $file_in_glob |
| } |
| } |
| |
| # Execute an srun job to print hostname to output_file with task_cnt tasks |
| # per node, wait for completion |
| # Returns 0 on successful completion, returns 1 otherwise |
| proc run_cat_job { input_file output_file } { |
| global bin_cat bin_rm job_name number srun node_cnt other_opts task_cnt timeout |
| exec $bin_rm -f $output_file |
| |
| spawn $srun --job-name=$job_name -e - -i $input_file -o $output_file -N$node_cnt -n$task_cnt $other_opts -t1 $bin_cat - |
| expect { |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "srun not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| return [wait_for_file $output_file] |
| } |
| |
| # |
| # Create a sizable text file |
| # |
| exec $bin_cat /etc/hosts >$file_in |
| exec $bin_cat /etc/passwd >>$file_in |
| set stdin_lines [get_line_cnt $file_in] |
| set stdout_target [expr $stdin_lines * $task_cnt] |
| |
| # Make a text file for each task |
| set timeout $max_job_delay |
| spawn $srun -e /dev/null -i $file_in -o $file_in_task -N$node_cnt -n$task_cnt $other_opts -t1 $bin_cat |
| expect { |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "srun not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # |
| # Run cycle_count jobs to copy job input to job output and compare sizes |
| # |
| set success_cnt 0 |
| for {set inx 0} {$inx < $cycle_count} {incr inx} { |
| if {[run_cat_job $file_in_task $file_out]} { |
| fail "Unable to run cat job" |
| } |
| set stdout_lines [get_line_cnt $file_out] |
| if {$stdout_lines != $stdout_target} { |
| exec $bin_sleep 1 |
| set stdout_lines [get_line_cnt $file_out] |
| } |
| if {$stdout_lines != $stdout_target} { |
| if {$stdout_lines == 0} { |
| fail "stdout is empty. Is current working directory writable from compute nodes?" |
| } else { |
| fail "stdout is incomplete" |
| } |
| } else { |
| incr success_cnt |
| } |
| } |