blob: 01ac058f3f5e4df5f36ff5bf1368c65db7011628 [file] [log] [blame]
#include <errno.h>
#include <getopt.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "backfill.h"
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
#include "src/common/hostlist.h"
#include "src/common/job_features.h"
#include "src/common/log.h"
#include "src/common/macros.h"
#include "src/common/read_config.h"
#include "src/common/timers.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/interfaces/burst_buffer.h"
#include "src/interfaces/jobcomp.h"
#include "src/interfaces/node_features.h"
#include "src/interfaces/switch.h"
#include "src/interfaces/topology.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/slurmctld.h"
#include <check.h>
#ifndef HAVE_FRONT_END
diag_stats_t slurmctld_diag_stats;
void *acct_db_conn = NULL;
uint32_t cluster_cpus = NO_VAL;
list_t *job_list = NULL;
list_t *resume_job_list = NULL;
time_t last_job_update = (time_t) 0;
time_t last_part_update = (time_t) 0;
time_t last_node_update = (time_t) 0;
time_t last_resv_update = (time_t) 0;
int slurmctld_tres_cnt = 4;
uint16_t accounting_enforce = 0;
int active_node_record_count;
slurm_conf_t slurm_conf;
node_record_t **node_record_table_ptr;
list_t *part_list;
list_t *resv_list = NULL;
int node_record_count;
slurmctld_config_t slurmctld_config;
uint32_t max_powered_nodes = NO_VAL;
bool preempt_send_user_signal = false;
int sched_interval = 60;
int batch_sched_delay = 3;
bool disable_remote_singleton = false;
int max_depend_depth = 10;
bool cloud_dns = false;
uint32_t validate_resv_cnt = 0;
bitstr_t *asap_node_bitmap = NULL; /* bitmap of rebooting asap nodes */
bitstr_t *avail_node_bitmap = NULL; /* bitmap of available nodes */
bitstr_t *bf_ignore_node_bitmap = NULL; /* bitmap of nodes to ignore during a
* backfill cycle */
bitstr_t *booting_node_bitmap = NULL; /* bitmap of booting nodes */
bitstr_t *cg_node_bitmap = NULL; /* bitmap of completing nodes */
bitstr_t *cloud_node_bitmap = NULL; /* bitmap of cloud nodes */
bitstr_t *external_node_bitmap = NULL; /* bitmap of external nodes */
bitstr_t *future_node_bitmap = NULL; /* bitmap of FUTURE nodes */
bitstr_t *idle_node_bitmap = NULL; /* bitmap of idle nodes */
bitstr_t *power_down_node_bitmap = NULL; /* bitmap of powered down nodes */
bitstr_t *rs_node_bitmap = NULL; /* bitmap of resuming nodes */
bitstr_t *share_node_bitmap = NULL; /* bitmap of sharable nodes */
bitstr_t *up_node_bitmap = NULL; /* bitmap of non-down nodes */
bitstr_t *power_up_node_bitmap = NULL; /* bitmap of power_up requested nodes */
pthread_mutex_t check_bf_running_lock = PTHREAD_MUTEX_INITIALIZER;
typedef struct {
char *testcases;
char *configdir;
} backfilltest_opts_t;
static backfilltest_opts_t params;
static void _free_options(void);
static void _help_msg(void);
static int _set_options(const int argc, char **argv);
static void _init_opts(void)
{
memset(&params, 0, sizeof(backfilltest_opts_t));
}
static int _set_options(int argc, char **argv)
{
int c;
_init_opts();
while ((c = getopt(argc, argv, "c:t:U")) != EOF) {
switch (c) {
case 'c':
params.configdir = xstrdup(optarg);
break;
case 't':
params.testcases = xstrdup(optarg);
break;
case 'U':
_help_msg();
exit(1);
break;
default:
exit(1);
}
}
return 0;
}
/* _check_params()
*/
static int _check_params(void)
{
char *conf_path;
char *src_dir = getenv("srcdir");
if (params.configdir) {
conf_path = xstrdup_printf("%s/slurm.conf", params.configdir);
} else if (src_dir) {
conf_path = xstrdup_printf("%s/slurm.conf", src_dir);
} else {
conf_path = xstrdup("slurm.conf");
}
setenv("SLURM_CONF", conf_path, 1);
xfree(conf_path);
return 0;
}
static void _help_msg(void)
{
info("\
Usage backfill-test [<OPTION>]\n"
"\n"
"Valid <OPTION> values are:\n"
" -c Path to a directory with slurm config files.\n"
" -t Path to a file containing test cases.\n"
" -U Display brief usage message\n"
"backfill-test can run in two modes:pre-set libcheck tests or\n"
"as a backfill emulator when the '-t' option is used.\n");
}
/* _free_options()
*/
static void _free_options(void)
{
xfree(params.testcases);
xfree(params.configdir);
}
/* this will leak memory, but we don't care really */
static void _list_delete_job(void *job_entry)
{
job_record_t *job_ptr = (job_record_t *) job_entry;
xfree(job_ptr);
}
static int _print_job(void *x, void *arg)
{
job_record_t *job_ptr = x;
bitstr_t *tmp_bitmap;
uint32_t *now = arg;
printf("Job_Id=%u %s ", job_ptr->job_id,
job_state_string(job_ptr->job_state));
if (job_ptr->sched_nodes && IS_JOB_PENDING(job_ptr)) {
node_name2bitmap(job_ptr->sched_nodes, false, &tmp_bitmap,
NULL);
bit_not(tmp_bitmap);
printf("planned on %s start_time:+%ld fragmentation:%u\n",
job_ptr->sched_nodes, job_ptr->start_time - *now,
topology_g_get_fragmentation(tmp_bitmap));
FREE_NULL_BITMAP(tmp_bitmap);
} else if (IS_JOB_RUNNING(job_ptr)) {
char *tmp_str = bitmap2node_name(job_ptr->node_bitmap);
tmp_bitmap = bit_copy(job_ptr->node_bitmap);
bit_not(tmp_bitmap);
printf("on %s end_time:+%ld fragmentation:%u\n",
tmp_str, job_ptr->end_time - *now,
topology_g_get_fragmentation(tmp_bitmap));
FREE_NULL_BITMAP(tmp_bitmap);
} else {
printf(" no planned\n");
}
fflush(stdout);
return 0;
}
job_record_t *__add_job(uint32_t job_id, uint32_t priority, uint32_t nodes,
uint32_t time_limit, char *licenses)
{
static uint32_t last_job_id = 0;
if (!job_id) {
job_id = ++last_job_id;
} else
last_job_id = MAX(last_job_id, job_id);
job_record_t *job_ptr = job_record_create();
job_ptr->priority = priority;
job_ptr->job_id = job_id;
job_ptr->partition = xstrdup("test");
job_ptr->part_ptr = find_part_record("test");
job_ptr->state_reason = WAIT_NO_REASON;
job_ptr->details->min_nodes = nodes;
job_ptr->details->max_nodes = nodes;
job_ptr->details->num_tasks = nodes;
job_ptr->details->min_cpus = nodes;
job_ptr->details->max_cpus = NO_VAL;
job_ptr->details->cpus_per_task = 1;
job_ptr->details->task_dist = SLURM_DIST_CYCLIC;
job_ptr->details->share_res = 1;
job_ptr->details->whole_node = 0;
job_ptr->time_limit = time_limit;
job_ptr->best_switch = true;
job_ptr->limit_set.tres = xcalloc(slurmctld_tres_cnt, sizeof(uint16_t));
job_ptr->tres_req_cnt = xcalloc(slurmctld_tres_cnt, sizeof(uint64_t));
job_ptr->tres_req_cnt[TRES_ARRAY_NODE] = job_ptr->details->min_nodes;
job_ptr->tres_req_cnt[TRES_ARRAY_MEM] = 1;
job_ptr->tres_req_cnt[TRES_ARRAY_CPU] = job_ptr->details->min_cpus;
if (licenses) {
bool valid = true;
job_ptr->license_list = license_validate(licenses, true, true,
true, NULL, &valid);
job_ptr->licenses = xstrdup(licenses);
}
list_append(job_list, job_ptr);
return job_ptr;
}
void load_test()
{
char buffer[256];
FILE *f = fopen(params.testcases, "r");
if (f == NULL) {
return;
}
while (fgets(buffer, 256, f)) {
char *p;
uint32_t job_id, priority, nodes, time_limit;
if ((buffer[0] == '#') || (buffer[0] == '\n'))
continue;
job_id = strtoul(buffer, &p, 10);
priority = strtoul(p, &p, 10);
nodes = strtoul(p, &p, 10);
time_limit = strtoul(p, &p, 10);
__add_job(job_id, priority, nodes, time_limit, p);
}
fclose(f);
}
/*
* Test simple backfill situation
*
* 1st job uses 1 node
* 2nd job requests all nodes and can't run.
* 3rd job requests remaining 31 nodes and gets backfilled.
*/
START_TEST(test_backfill_1)
{
job_record_t *job_ptr;
uint32_t now = time(NULL);
/* job_id, priority, nodes, time_limit, licenses */
__add_job(0, 10, 1, 10, NULL);
__add_job(0, 5, 32, 10, NULL);
__add_job(0, 1, 31, 5, NULL);
__attempt_backfill();
list_for_each(job_list, _print_job, &now);
job_ptr = find_job_record(1);
ck_assert_msg(IS_JOB_RUNNING(job_ptr), "Job 1 RUNNING");
job_ptr = find_job_record(2);
ck_assert_msg(!IS_JOB_RUNNING(job_ptr), "Job 2 !RUNNING");
job_ptr = find_job_record(3);
ck_assert_msg(IS_JOB_RUNNING(job_ptr), "Job 3 RUNNING");
}
END_TEST
/*
* Test for starving jobs described in scenario in Ticket 20847
*
* Bigger, lower priority jobs were jumping ahead of smaller, higher priority
* jobs.
*
* Higher priority jobs had no start time.
*/
START_TEST(test_backfill_2)
{
uint32_t now = time(NULL);
/* job_id, priority, nodes, time_limit, licenses */
__add_job(0, 10, 6, 10, NULL);
__add_job(0, 9, 27, 15, NULL);
__add_job(0, 8, 28, 14, NULL);
__add_job(0, 7, 29, 13, NULL);
__add_job(0, 6, 30, 12, NULL);
__add_job(0, 5, 5, 10, NULL);
__add_job(0, 5, 5, 10, NULL);
/* This job would jump ahead of the priority 6 job */
__add_job(0, 1, 30, 11, NULL);
__attempt_backfill();
list_for_each(job_list, _print_job, &now);
for (int i = 1; i < 9; i++) {
for (int j = 1; j < 9; j++) {
job_record_t *job1_ptr = find_job_record(i);
job_record_t *job2_ptr = find_job_record(j);
if (!job1_ptr || !job2_ptr)
continue;
if ((job1_ptr->priority > job2_ptr->priority) &&
(job1_ptr->details->min_nodes <=
job2_ptr->details->min_nodes) &&
(job2_ptr->start_time) &&
(!job1_ptr->start_time ||
(job1_ptr->start_time > job2_ptr->start_time)))
ck_abort_msg("Wrong backfill order");
}
}
}
END_TEST
/*
* Test BF_MAX_JOB_TEST (default=500)
*
* Submit 1000 jobs.
* Check if 500 jobs are backfilled.
* 501th job shouldn't have a start time.
*/
START_TEST(test_backfill_3)
{
job_record_t *job1_ptr;
job_record_t *job2_ptr;
for (int i = 0; i < 1000; i++) {
/* job_id, priority, nodes, time_limit, licenses */
__add_job(0, 10, 6, 10, NULL);
}
__attempt_backfill();
job1_ptr = find_job_record(500);
job2_ptr = find_job_record(501);
ck_assert_msg((job1_ptr->start_time && !job2_ptr->start_time),
"Completed testing 500 (bf_max_job_test) jobs");
/*
* uint32_t now = time(NULL);
* list_for_each(job_list, _print_job, &now);
*/
}
END_TEST
/*
* Test basic simplest backfiling of licences
*/
START_TEST(test_backfill_lic_1)
{
uint32_t now = time(NULL);
job_record_t *job_ptr;
/* job_id, priority, nodes, time_limit */
__add_job(1, 10, 1, 10, "lic1");
__add_job(2, 9, 1, 10, "lic1");
__add_job(3, 8, 1, 10, "lic1");
__add_job(4, 7, 1, 10, NULL);
__attempt_backfill();
list_for_each(job_list, _print_job, &now);
job_ptr = find_job_record(1);
ck_assert_msg(IS_JOB_RUNNING(job_ptr), "Job 1 RUNNING");
job_ptr = find_job_record(2);
ck_assert_msg(!IS_JOB_RUNNING(job_ptr), "Job 2 !RUNNING");
job_ptr = find_job_record(3);
ck_assert_msg(!IS_JOB_RUNNING(job_ptr), "Job 3 !RUNNING");
job_ptr = find_job_record(4);
ck_assert_msg(IS_JOB_RUNNING(job_ptr), "Job 4 RUNNING");
}
END_TEST
/*
* Test for wrong start_time scenario in Issue 50271
*/
START_TEST(test_backfill_lic_2)
{
uint32_t now = time(NULL);
job_record_t *job1_ptr, *job2_ptr;
part_record_t *part_ptr = find_part_record("test");
part_ptr->max_share = 1;
for (int i = 0; i < 12; i++) {
/* job_id, priority, nodes, time_limit */
__add_job(0, 10, 1, 10, "lic2");
}
__attempt_backfill();
list_for_each(job_list, _print_job, &now);
job1_ptr = find_job_record(7);
job2_ptr = find_job_record(12);
if (job1_ptr->start_time != job2_ptr->start_time)
ck_abort_msg("Wrong start_time");
}
END_TEST
#endif
int main(int argc, char *argv[])
{
int number_failed = 0;
#ifndef HAVE_FRONT_END
part_record_t *part_ptr = part_record_create();
log_options_t log_opts = LOG_OPTS_INITIALIZER;
log_opts.stderr_level = LOG_LEVEL_INFO;
log_init("backfill-test", log_opts, 0, NULL);
_set_options(argc, argv);
_check_params();
slurm_init(NULL);
select_g_init();
init_node_conf();
build_all_nodeline_info(true, 0);
switch_g_init(true);
topology_g_init();
topology_g_build_config();
avail_node_bitmap = bit_alloc(node_record_count);
bit_not(avail_node_bitmap);
part_list = list_create(NULL);
part_ptr->name = xstrdup("test");
part_ptr->node_bitmap = bit_copy(avail_node_bitmap);
part_ptr->max_share = 0;
list_append(part_list, part_ptr);
select_g_node_init();
node_features_g_init();
jobcomp_g_init();
asap_node_bitmap = bit_alloc(node_record_count);
rs_node_bitmap = bit_alloc(node_record_count);
cg_node_bitmap = bit_alloc(node_record_count);
external_node_bitmap = bit_alloc(node_record_count);
power_down_node_bitmap = bit_alloc(node_record_count);
booting_node_bitmap = bit_alloc(node_record_count);
bf_ignore_node_bitmap = bit_alloc(node_record_count);
up_node_bitmap = bit_copy(avail_node_bitmap);
share_node_bitmap = bit_copy(avail_node_bitmap);
idle_node_bitmap = bit_copy(avail_node_bitmap);
power_up_node_bitmap = bit_copy(avail_node_bitmap);
job_list = list_create(_list_delete_job);
resv_list = list_create(NULL);
license_init(slurm_conf.licenses);
select_g_reconfigure();
if (!params.testcases) {
Suite *s = suite_create("backfill");
SRunner *sr = srunner_create(s);
TCase *tc = tcase_create("backfill");
tcase_set_timeout(tc, 10);
tcase_add_test(tc, test_backfill_1);
tcase_add_test(tc, test_backfill_2);
tcase_add_test(tc, test_backfill_3);
tcase_add_test(tc, test_backfill_lic_1);
tcase_add_test(tc, test_backfill_lic_2);
suite_add_tcase(s, tc);
srunner_run_all(sr, CK_ENV);
number_failed += srunner_ntests_failed(sr);
srunner_free(sr);
} else {
uint32_t now;
load_test();
now = time(NULL);
__attempt_backfill();
list_for_each(job_list, _print_job, &now);
}
_free_options();
#endif
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}