blob: dea5a1b515dc316ceddc79a1e78d01cc0e01a986 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test federation functionality
# Test that submissions to clusters in federation get unique job id.
#
# Reqs: 1. Using slurmdbd accounting storage type and is up
# 2. fed_slurm_base is defined in globals.local - set to directory that
# has access to each federation configure (fedc1, fedc2, fedc3).
# Eg.
# fedr/slurm/ (src)
# fedr/fed1/bin
# fedr/fed1/sbin
# fedr/fed1/etc
# fedr/fed1/...
# fedr/fed2/...
# fedr/fed3/...
# 3. controllers are up and running.
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
source ./globals_accounting
source ./globals_federation
set fed_name "feda"
set timeout 5
#
# Check accounting config and bail if not found.
#
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
skip "This test can't be run without a usable AccountStorageType"
}
if {[get_admin_level] ne "Administrator"} {
skip "This test can't be run without being an Accounting administrator. Use: sacctmgr mod user \$USER set admin=admin"
}
proc cleanup { } {
global fed_name
delete_federations $fed_name
}
# submit jobs to each cluster and ensure that they return a unique job id
# job id should be cluster_id + job_id
# local_id: bits 0-25
# cluster_id: bits26-31
proc test_fed_job_id { cname cid} {
global fed_slurm_base number
set job_id 0
set rc 0
#bits 0-25
set max_local_id [expr 0x3FFFFFF]
set my_sbatch "${fed_slurm_base}/$cname/bin/sbatch"
spawn $my_sbatch --wrap="hostname" -o/dev/null
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
-re "error" {
log_error "Job was not submitted"
incr rc
}
timeout {
log_error "sbatch is not responding"
incr rc
}
eof {
wait
}
}
if {$job_id <= $max_local_id} {
log_error "$cname didn't give back a federation jobid ($job_id)"
incr rc
}
set local_id [expr $job_id & $max_local_id]
set clust_id [expr $job_id >> 26]
log_debug "Fed JobID:$job_id Local JobID:$local_id Cluster ID:$clust_id"
if {!$rc && ($clust_id != $cid)} {
log_error "Jobid($job_id) from $cname didn't give correct partition id ($part_id != $cid)"
incr rc
}
return $rc
}
# submit jobs to non-federated cluster and make sure the jobid is not a
# federated jobid.
proc test_loc_job_id { cname } {
global fed_slurm_base number
set job_id 0
set rc 0
#bits 0-25
set max_local_id [expr 0x3FFFFFF]
set my_sbatch "${fed_slurm_base}/$cname/bin/sbatch"
spawn $my_sbatch --wrap="hostname" -o/dev/null
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
-re "error" {
log_error "Job was not submitted"
incr rc
}
timeout {
log_error "sbatch is not responding"
incr rc
}
eof {
wait
}
}
if {$job_id > $max_local_id} {
log_error "$cname didn't give back a non-federation job id"
incr rc
}
log_debug "JobID:$job_id"
return $rc
}
if {![check_federation_setup]} {
skip "This test can't be run without fed_slurm_base, fedc1, fedc2, fedc3 setup in globals.local"
}
if {![check_federation_up]} {
skip "This test can't be run without all clusters up"
}
# Remove existing setup
cleanup
# add clusters to federation
if [setup_federation $fed_name] {
fail "Failed to setup federation"
}
# Get cluster/fed info from db
array set clusters [get_clusterfed_info $fed_name]
log_info "Verify federation ids are returned when cluster is part of federation"
if {[test_fed_job_id $fedc1 [dict get $clusters($fedc1) id]] ||
[test_fed_job_id $fedc2 [dict get $clusters($fedc2) id]] ||
[test_fed_job_id $fedc3 [dict get $clusters($fedc3) id]]} {
fail "Federation id was not returned"
}
# Verify that after clusters are removed from federation that they give normal
# job ids again.
if {[delete_federations $fed_name]} {
fail "Failed delete federation ($fed_name)"
}
sleep 2
log_info "Verify local ids after being removed from federation"
if {[test_loc_job_id $fedc1] ||
[test_loc_job_id $fedc2] ||
[test_loc_job_id $fedc3]} {
fail "Local ids could not be verified"
}