Merge branch 't21444-Stop_inval_read_arb_dist' into 'master'

See merge request SchedMD/dev/slurm!266
diff --git a/src/common/job_record.c b/src/common/job_record.c
index 5e24654..dc28d44 100644
--- a/src/common/job_record.c
+++ b/src/common/job_record.c
@@ -1371,7 +1371,8 @@
 	return slurm_sort_int_list_asc(&node1->node_index, &node2->node_index);
 }
 
-extern int job_record_calc_arbitrary_tpn(job_record_t *job_ptr)
+extern int job_record_calc_arbitrary_tpn(job_record_t *job_ptr,
+					 uint16_t protocol_version)
 {
 	uint16_t *arbitrary_tasks_np = NULL;
 	int rc = SLURM_SUCCESS;
@@ -1380,6 +1381,7 @@
 	char *host, *prev_host = NULL;
 	node_inx_cnt_t *node_inx_cnts;
 	hostlist_t *hl = hostlist_create(job_ptr->details->req_nodes);
+	int num_names = hostlist_count(hl);
 	hostlist_sort(hl);
 
 	arbitrary_tasks_np = xcalloc(num_nodes, sizeof(uint16_t));
@@ -1417,6 +1419,33 @@
 		goto cleanup;
 	}
 
+	if (num_names != job_ptr->details->num_tasks) {
+		error("Task count (%u) for %pJ is not equal to the number of nodes in the requested arbitrary node list (%s)",
+		      job_ptr->details->num_tasks, job_ptr,
+		      job_ptr->details->req_nodes);
+
+		/* Reject arbitrary jobs with bad task counts in 25.11+. */
+		if (protocol_version >= SLURM_25_11_PROTOCOL_VERSION) {
+			free(prev_host);
+			rc = ESLURM_BAD_TASK_COUNT;
+			goto cleanup;
+		}
+
+		/*
+		 * Allow existing older version jobs from save state to be
+		 * loaded. This prevents all jobs being lost during an
+		 * upgrade. Reallocate the arbitrary_tasks_np array to prevent
+		 * an invalid read if the number of tasks > num_names. This
+		 * can be removed when upgrading from
+		 * SLURM_25_05_PROTOCOL_VERSION is not supported, and the
+		 * protocol_version parameter to this function can be removed.
+		 */
+		if (num_names < job_ptr->details->num_tasks)
+			xrecalloc(arbitrary_tasks_np,
+				  job_ptr->details->num_tasks,
+				  sizeof(arbitrary_tasks_np[0]));
+	}
+
 	node_inx_cnts[cur_node].node_index = node_name_get_inx(prev_host);
 	free(prev_host);
 
@@ -1846,7 +1875,8 @@
 	job_ptr->details->work_dir = work_dir;
 
 	if (((job_ptr->details->task_dist & SLURM_DIST_STATE_BASE) ==
-	     SLURM_DIST_ARBITRARY) && job_record_calc_arbitrary_tpn(job_ptr))
+	     SLURM_DIST_ARBITRARY) &&
+	    job_record_calc_arbitrary_tpn(job_ptr, job_ptr->start_protocol_ver))
 		return SLURM_ERROR;
 
 	return SLURM_SUCCESS;
diff --git a/src/common/job_record.h b/src/common/job_record.h
index 52b2bea..3db8030 100644
--- a/src/common/job_record.h
+++ b/src/common/job_record.h
@@ -705,7 +705,8 @@
 extern int load_step_state(job_record_t *job_ptr, buf_t *buffer,
 			   uint16_t protocol_version);
 
-extern int job_record_calc_arbitrary_tpn(job_record_t *job_ptr);
+extern int job_record_calc_arbitrary_tpn(job_record_t *job_ptr,
+					 uint16_t protocol_version);
 
 extern void job_record_pack_details_common(
 	job_details_t *detail_ptr, buf_t *buffer, uint16_t protocol_version);
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index e4dcffb..26866b7 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -8645,12 +8645,14 @@
 	detail_ptr->x11_magic_cookie = xstrdup(job_desc->x11_magic_cookie);
 	detail_ptr->x11_target = xstrdup(job_desc->x11_target);
 	detail_ptr->x11_target_port = job_desc->x11_target_port;
+	if (job_desc->num_tasks != NO_VAL)
+		detail_ptr->num_tasks = job_desc->num_tasks;
 	if (job_desc->req_nodes) {
 		if ((job_desc->task_dist & SLURM_DIST_STATE_BASE) ==
 		    SLURM_DIST_ARBITRARY) {
 			detail_ptr->req_nodes = xstrdup(job_desc->req_nodes);
-			if ((error_code =
-			     job_record_calc_arbitrary_tpn(job_ptr)))
+			if ((error_code = job_record_calc_arbitrary_tpn(
+				job_ptr, SLURM_PROTOCOL_VERSION)))
 				return error_code;
 		} else {
 			detail_ptr->req_nodes =
@@ -8716,8 +8718,6 @@
 		detail_ptr->pn_min_cpus = job_desc->pn_min_cpus;
 	if (job_desc->overcommit != NO_VAL8)
 		detail_ptr->overcommit = job_desc->overcommit;
-	if (job_desc->num_tasks != NO_VAL)
-		detail_ptr->num_tasks = job_desc->num_tasks;
 	if (job_desc->ntasks_per_node != NO_VAL16) {
 		detail_ptr->ntasks_per_node = job_desc->ntasks_per_node;
 		if ((detail_ptr->overcommit == 0) &&