Merge branch 'broderick/rest-power-down-asap' into 'master'

See merge request SchedMD/dev/slurm!2702
diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am
index 17f2bca..1517c44 100644
--- a/doc/html/Makefile.am
+++ b/doc/html/Makefile.am
@@ -36,7 +36,6 @@
 	hres.html \
 	jobcomp_kafka.html \
 	job_array.html \
-	job_container_tmpfs.html \
 	job_exit_code.html \
 	job_launch.html \
 	job_reason_codes.html \
@@ -53,6 +52,7 @@
 	metrics.html \
 	mpi_guide.html \
 	multi_cluster.html \
+	namespace.html \
 	network.html \
 	nss_slurm.html \
 	openapi_release_notes.html \
diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in
index 4ac3600..99d2197 100644
--- a/doc/html/Makefile.in
+++ b/doc/html/Makefile.in
@@ -496,7 +496,6 @@
 	hres.html \
 	jobcomp_kafka.html \
 	job_array.html \
-	job_container_tmpfs.html \
 	job_exit_code.html \
 	job_launch.html \
 	job_reason_codes.html \
@@ -513,6 +512,7 @@
 	metrics.html \
 	mpi_guide.html \
 	multi_cluster.html \
+	namespace.html \
 	network.html \
 	nss_slurm.html \
 	openapi_release_notes.html \
diff --git a/doc/html/configless_slurm.shtml b/doc/html/configless_slurm.shtml
index 3ae8a51..95fe758 100644
--- a/doc/html/configless_slurm.shtml
+++ b/doc/html/configless_slurm.shtml
@@ -153,6 +153,7 @@
 <li>helpers.conf</li>
 <li>job_container.conf</li>
 <li>mpi.conf</li>
+<li>namespace.yaml</li>
 <li>oci.conf</li>
 <li>plugstack.conf</li>
 <li>scrun.lua</li>
diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml
index acda536..e02fe10 100644
--- a/doc/html/documentation.shtml
+++ b/doc/html/documentation.shtml
@@ -45,7 +45,7 @@
 <li><a href="dynamic_nodes.html">Dynamic Nodes</a></li>
 <li><a href="elasticsearch.html">Elasticsearch Guide</a></li>
 <li><a href="jobcomp_kafka.html">Job Completion Kafka plugin Guide</a></li>
-<li><a href="job_container_tmpfs.html">job_container/tmpfs - Job Specific Temporary File Management</a></li>
+<li><a href="namespace_tmpfs.html">namespace/tmpfs - Job Specific Temporary File Management</a></li>
 <li><a href="jwt.html">JSON Web Tokens Authentication</a></li>
 <li><a href="federation.html">Federated Scheduling Guide</a></li>
 <li><a href="pam_slurm_adopt.html">Job Containment (SSH Session Control) with pam_slurm_adopt</a></li>
diff --git a/doc/html/job_container_tmpfs.shtml b/doc/html/job_container_tmpfs.shtml
deleted file mode 100644
index 4e18c70..0000000
--- a/doc/html/job_container_tmpfs.shtml
+++ /dev/null
@@ -1,68 +0,0 @@
-<!--#include virtual="header.txt"-->
-
-<h1>job_container/tmpfs</h1>
-
-<h2 id="overview">Overview<a class="slurm_link" href="#overview"></a></h2>
-<p>job_container/tmpfs is an optional plugin that provides job-specific, private
-temporary file system space.</p>
-
-<p>When enabled on the cluster, a filesystem namespace will be created for each
-job with a unique, private instance of /tmp and /dev/shm for the job to use.
-These directories can be changed with the <b>Dirs=</b> option in
-job_container.conf. The contents of these directories will be removed at job
-termination.</p>
-
-<h2 id="installation">Installation
-<a class="slurm_link" href="#installation"></a>
-</h2>
-<p>This plugin is built and installed as part of the default build, no extra
-installation steps are required.</p>
-
-<h2 id="setup">Setup<a class="slurm_link" href="#setup"></a></h2>
-<p>Slurm must be configured to load the job container plugin by adding
-<b>JobContainerType=job_container/tmpfs</b> and <b>PrologFlags=contain</b> in
-slurm.conf. Additional configuration must be done in the job_container.conf
-file, which should be placed in the same directory as slurm.conf.</p>
-
-<p>Job containers can be configured for all nodes, or for a subset of nodes.
-As an example, if all nodes will be configured the same way, you would put the
-following in your job_container.conf:</p>
-
-<pre>
-AutoBasePath=true
-BasePath=/var/nvme/storage
-</pre>
-
-<p>A full description of the parameters available in the job_container.conf
-file can be found <a href="job_container.conf.html">here</a>.</p>
-
-<h2 id="initial_testing">Initial Testing
-<a class="slurm_link" href="#initial_testing"></a>
-</h2>
-<p>An easy way to verify that the container is working is to run a job and
-ensure that the /tmp directory is empty (since it normally has some other
-files) and that "." is owned by the user that submitted the job.</p>
-<pre>
-tim@slurm-ctld:~$ srun ls -al /tmp
-total 8
-drwx------  2 tim    root 4096 Feb 10 17:14 .
-drwxr-xr-x 21 root   root 4096 Nov 15 08:46 ..
-</pre>
-
-<p>While a job is running, root should be able to confirm that
-<code>/$BasePath/$JobID/_tmp</code> exists and is empty. This directory is bind
-mounted into the job. <code>/$BasePath/$JobID</code> should be owned by root,
-and is not intended to be accessible to the user.</p>
-
-<h2 id="spank">SPANK<a class="slurm_link" href="#spank"></a></h2>
-<p>This plugin interfaces with the SPANK api, and automatically joins the job's
-container in the following functions:</p>
-<ul>
-<li>spank_task_init_privileged()</li>
-<li>spank_task_init()</li>
-</ul>
-
-<p>In addition to the job itself, the TaskProlog will also be executed inside
-the container.</p>
-
-<!--#include virtual="footer.txt"-->
diff --git a/doc/html/namespace.shtml b/doc/html/namespace.shtml
new file mode 100644
index 0000000..caa5224
--- /dev/null
+++ b/doc/html/namespace.shtml
@@ -0,0 +1,109 @@
+<!--#include virtual="header.txt"-->
+
+<h1>Namespace Plugins</h1>
+
+<h2 id="overview">Overview<a class="slurm_link" href="#overview"></a></h2>
+
+<p>A namespace plugin can be enabled to provide job-specific, private temporary
+file system space.</p>
+
+<p>When enabled on the cluster, a filesystem namespace will be created for each
+job with a unique, private instance of /tmp and /dev/shm for the job to use.
+These directories can be changed with the <b>Dirs=</b> option in the
+plugin-specific configuration file. The contents of these directories will be
+removed at job termination. Additionally, the <b>namespace/linux</b> plugin can
+be configured to create new PID and user namespaces.</p>
+
+<h2 id="installation">Installation
+<a class="slurm_link" href="#installation"></a>
+</h2>
+
+<p>These plugins are built and installed as part of the default build, no extra
+installation steps are required.</p>
+
+<h2 id="setup">Setup<a class="slurm_link" href="#setup"></a></h2>
+
+<p>Slurm must be configured to load the namespace plugin by adding
+<b>PrologFlags=contain</b> and setting <b>NamespaceType</b> to the desired
+plugin in slurm.conf. Additional configuration must be done in the
+plugin-specific configuration file file, which should be placed in the same
+directory as slurm.conf.</p>
+
+<h3 id="linux_setup">namespace/linux plugin
+<a class="slurm_link" href="#linux_setup"></a>
+</h3>
+
+<p>The <b>namespace/linux</b> plugin (added 25.11) uses the configuration file
+<a href="namespace.yaml.html">namespace.yaml</a>. This plugin can be configured
+to create user and PID namespaces in addition to a temporary filesystem
+namespace. Namespaces can be configured for all nodes or for a subset of nodes.
+As an example, if all nodes will be configured the same way, you could put the
+following in your namespace.yaml:</p>
+
+<pre>
+defaults:
+  auto_base_path: true
+  base_path: "/var/nvme/storage"
+</pre>
+
+<p><b>Note</b> the following important details with this plugin:</p>
+
+<ul>
+<li>This plugin requires <b>cgroup/v2</b> to operate correctly.</li>
+<li>When using user namespaces, bpf token support (added in kernel 6.9) is
+  required to use <a href="cgroup.conf.html#OPT_ConstrainDevices">
+  ConstrainDevices</a> in <b>cgroup.conf</b>.</li>
+</ul>
+
+<h3 id="tmpfs_setup">namespace/tmpfs plugin
+<a class="slurm_link" href="#tmpfs_setup"></a>
+</h3>
+
+<p>The <b>namespace/tmpfs</b> plugin (formerly job_container/tmpfs) uses the
+configuration file <a href="job_container.conf.html">job_container.conf</a>.
+Namespaces can be configured for all nodes, or for a subset of nodes. As an
+example, if all nodes will be configured the same way, you could put the
+following in your job_container.conf:</p>
+
+<pre>
+AutoBasePath=true
+BasePath=/var/nvme/storage
+</pre>
+
+<h2 id="initial_testing">Initial Testing
+<a class="slurm_link" href="#initial_testing"></a>
+</h2>
+
+<p>An easy way to verify that the container is working is to run a job and
+ensure that the /tmp directory is empty (since it normally has some other
+files) and that "." is owned by the user that submitted the job.</p>
+<pre>
+tim@slurm-ctld:~$ srun ls -al /tmp
+total 8
+drwx------  2 tim    root 4096 Feb 10 17:14 .
+drwxr-xr-x 21 root   root 4096 Nov 15 08:46 ..
+</pre>
+
+<p>While a job is running, root should be able to confirm that
+<code>/$BasePath/$JobID/_tmp</code> exists and is empty. This directory is bind
+mounted into the job. <code>/$BasePath/$JobID</code> should be owned by root,
+and is not intended to be accessible to the user.</p>
+
+<p>Additionally, when the <b>Linux</b> plugin is in use, you can confirm that a
+PID namespace is in effect by running a job and running "ps". The only visible
+PIDs should be related to the job and PID 1 will be named <b>slurmstepd:
+[${job_id}.namespace]</b>.</p>
+
+<h2 id="spank">SPANK<a class="slurm_link" href="#spank"></a></h2>
+
+<p>This plugin interfaces with the SPANK api, and automatically joins the job's
+namespace in the following functions:</p>
+<ul>
+<li>spank_task_init_privileged()</li>
+<li>spank_task_init()</li>
+</ul>
+
+<p>In addition to the job itself, the TaskProlog will also be executed inside
+the container.</p>
+
+<!--#include virtual="footer.txt"-->
diff --git a/doc/html/pam_slurm_adopt.shtml b/doc/html/pam_slurm_adopt.shtml
index bcbbd78..e84b838 100644
--- a/doc/html/pam_slurm_adopt.shtml
+++ b/doc/html/pam_slurm_adopt.shtml
@@ -466,7 +466,7 @@
   <b id="join_container">join_container</b>
   <a class="slurm_link" href="#join_container"></a>
 </dt>
-<dd>Control the interaction with the job_container/tmpfs plugin.
+<dd>Control the interaction with the namespace plugins.
 Configurable values are:
 <dl>
 <dt></dt>
@@ -476,12 +476,12 @@
   <b id="join_container_true">true</b> (default)
   <a class="slurm_link" href="#join_container_true"></a>
 </dt>
-<dd>Attempt to join a container created by the job_container/tmpfs plugin.</dd>
+<dd>Attempt to join a namespace created by the namespace plugins.</dd>
 <dt>
   <b id="join_container_false">false</b>
   <a class="slurm_link" href="#join_container_false"></a>
 </dt>
-<dd>Do not attempt to join a container.</dd>
+<dd>Do not attempt to join a namespace.</dd>
 </dl>
 </dd>
 </dl>
@@ -563,7 +563,7 @@
 </pre>
 
 <p>It is possible for some plugins to require more permissions than this.
-Notably, job_container/tmpfs will require something more like this:</p>
+Notably, namespace/tmpfs will require something more like this:</p>
 
 <pre>
 module pam_slurm_adopt 1.0;
@@ -605,4 +605,8 @@
 <p>When using SELinux support in Slurm, the session started via pam_slurm_adopt
 won't necessarily be in the same context as the job it is associated with.</p>
 
+<p>When using namespace/linux and the user namespace is configured, the
+pam_limits module may not be able to set memlock, sigpending, msgqueue, nice, or
+rtprio.</p>
+
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1
index 937517c..6798134 100644
--- a/doc/man/man1/sinfo.1
+++ b/doc/man/man1/sinfo.1
@@ -1038,7 +1038,9 @@
 All jobs associated with this node are in the process of
 COMPLETING. This node state will be removed when
 all of the job's processes have terminated and the Slurm
-epilog program (if any) has terminated. See the \fBEpilog\fR
+epilog program (if any) has terminated. While a node is in
+the COMPLETING state, it is not eligible for new job placement
+and no jobs will be scheduled on it. See the \fBEpilog\fR
 parameter description in the \fBslurm.conf\fR(5) man page for
 more information.
 .IP
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 89576e8..fdf1237 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -936,9 +936,11 @@
 
 .TP
 \fBCompleteWait\fR
-The time to wait, in seconds, when any job is in the COMPLETING state
-before any additional jobs are scheduled. This is to attempt to keep jobs on
-nodes that were recently in use, with the goal of preventing fragmentation.
+The time to wait, in seconds, when any job in the cluster is in the
+COMPLETING state before any additional jobs are scheduled. This parameter is
+evaluated at the cluster scheduling level, not per node. This is to attempt to
+keep jobs on nodes that were recently in use, with the goal of preventing
+fragmentation.
 If set to zero, pending jobs will be started as soon as possible.
 Since a COMPLETING job's resources are released for use by other
 jobs as soon as the \fBEpilog\fR completes on each individual node,
@@ -951,6 +953,11 @@
 The default value of \fBCompleteWait\fR is zero seconds.
 The value may not exceed 65533.
 
+This parameter does not override the fact that nodes in the COMPLETING state
+are not eligible for job scheduling. Nodes will not accept new job placement
+until their Epilog completes and they leave the COMPLETING state, even if
+\fBCompleteWait\fR is set to zero.
+
 \fBNOTE\fR: Setting \fBreduce_completing_frag\fR affects the behavior
 of \fBCompleteWait\fR.
 .IP
@@ -1479,6 +1486,11 @@
 By default there is no epilog.
 See \fBProlog and Epilog Scripts\fR for more information.
 
+While the Epilog script is running on a node, that node will be placed in the
+\fBCOMPLETING\fR state. Nodes in the COMPLETING state do not accept new job
+allocations, and no jobs will be scheduled on them until the Epilog has
+finished and the node has returned to an available state.
+
 \fB\fBNOTE\fR: It is possible to configure multiple epilog scripts by including
 this option on multiple lines.\fR
 .IP
@@ -2799,24 +2811,29 @@
 .TP
 \fBNamespaceType\fR
 Identifies the plugin to be used for job isolation through namespaces. To use
-these plugins, 'PrologFlags=Contain' must be set.
+these plugins, 'PrologFlags=Contain' must be set. Refer to the namespace page
+for further details: <https://slurm.schedmd.com/namespace.html>
+
 \fBNOTE\fR: See \fBProctrackType\fR for resource containment and usage tracking.
+
 Acceptable values at present include:
 .IP
 .RS
 .TP 20
-\fBnamespace/tmpfs\fR
-Used to create a private namespace on the filesystem for jobs, which houses
-temporary file systems (/tmp and /dev/shm) for each job.
-.IP
-.TP
 \fBnamespace/linux\fR
 Used to create a private filesystem namespace and optionally user and pid
 namespaces. The filesystem namespace houses temporary filesystems (/tmp and
 /dev/shm) for each job.
+
 \fBNOTE\fR: This plugin requires cgroup/v2 to operate correctly.
+.br
 \fBNOTE\fR: When using user namespaces, bpf token support (added in kernel 6.9)
 is required to use ConstrainDevices in \fBcgroup.conf\fR.
+.IP
+.TP
+\fBnamespace/tmpfs\fR
+Used to create a private namespace on the filesystem for jobs, which houses
+temporary file systems (/tmp and /dev/shm) for each job.
 .RE
 .IP
 
@@ -4543,13 +4560,6 @@
 .IP
 
 .TP
-\fBenable_job_state_cache\fR
-Enables an independent cache of job state details within slurmctld. This allows
-processing of `\fBsqueue\fR \-\-only\-job\-state` and replaced RPCs with minimal
-impact on other slurmctld operations.
-.IP
-
-.TP
 \fBenable_user_top\fR
 Enable use of the "scontrol top" command by non\-privileged users.
 .IP
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 8356d46..923da94 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -34,6 +34,8 @@
 	env.h					\
 	extra_constraints.c			\
 	extra_constraints.h			\
+	events.c				\
+	events.h				\
 	fd.c					\
 	fd.h					\
 	fetch_config.c				\
diff --git a/src/common/Makefile.in b/src/common/Makefile.in
index 6cf5c1e..cc509a9 100644
--- a/src/common/Makefile.in
+++ b/src/common/Makefile.in
@@ -165,11 +165,11 @@
 am_libcommon_la_OBJECTS = assoc_mgr.lo bitstring.lo callerid.lo \
 	cbuf.lo core_array.lo cpu_frequency.lo cron.lo daemonize.lo \
 	data.lo dynamic_plugin_data.lo eio.lo env.lo \
-	extra_constraints.lo fd.lo fetch_config.lo forward.lo \
-	global_defaults.lo group_cache.lo half_duplex.lo hostlist.lo \
-	http.lo http_con.lo http_mime.lo http_router.lo http_switch.lo \
-	identity.lo id_util.lo io_hdr.lo job_features.lo \
-	job_options.lo job_record.lo job_resources.lo \
+	extra_constraints.lo events.lo fd.lo fetch_config.lo \
+	forward.lo global_defaults.lo group_cache.lo half_duplex.lo \
+	hostlist.lo http.lo http_con.lo http_mime.lo http_router.lo \
+	http_switch.lo identity.lo id_util.lo io_hdr.lo \
+	job_features.lo job_options.lo job_record.lo job_resources.lo \
 	job_state_reason.lo list.lo log.lo msg_type.lo net.lo \
 	node_conf.lo node_features.lo oci_config.lo openapi.lo optz.lo \
 	pack.lo parse_config.lo parse_time.lo parse_value.lo \
@@ -221,33 +221,33 @@
 	./$(DEPDIR)/cpu_frequency.Plo ./$(DEPDIR)/cron.Plo \
 	./$(DEPDIR)/daemonize.Plo ./$(DEPDIR)/data.Plo \
 	./$(DEPDIR)/dynamic_plugin_data.Plo ./$(DEPDIR)/eio.Plo \
-	./$(DEPDIR)/env.Plo ./$(DEPDIR)/extra_constraints.Plo \
-	./$(DEPDIR)/fd.Plo ./$(DEPDIR)/fetch_config.Plo \
-	./$(DEPDIR)/forward.Plo ./$(DEPDIR)/global_defaults.Plo \
-	./$(DEPDIR)/group_cache.Plo ./$(DEPDIR)/half_duplex.Plo \
-	./$(DEPDIR)/hostlist.Plo ./$(DEPDIR)/http.Plo \
-	./$(DEPDIR)/http_con.Plo ./$(DEPDIR)/http_mime.Plo \
-	./$(DEPDIR)/http_router.Plo ./$(DEPDIR)/http_switch.Plo \
-	./$(DEPDIR)/id_util.Plo ./$(DEPDIR)/identity.Plo \
-	./$(DEPDIR)/io_hdr.Plo ./$(DEPDIR)/job_features.Plo \
-	./$(DEPDIR)/job_options.Plo ./$(DEPDIR)/job_record.Plo \
-	./$(DEPDIR)/job_resources.Plo ./$(DEPDIR)/job_state_reason.Plo \
-	./$(DEPDIR)/list.Plo ./$(DEPDIR)/log.Plo \
-	./$(DEPDIR)/msg_type.Plo ./$(DEPDIR)/net.Plo \
-	./$(DEPDIR)/node_conf.Plo ./$(DEPDIR)/node_features.Plo \
-	./$(DEPDIR)/oci_config.Plo ./$(DEPDIR)/openapi.Plo \
-	./$(DEPDIR)/optz.Plo ./$(DEPDIR)/pack.Plo \
-	./$(DEPDIR)/parse_config.Plo ./$(DEPDIR)/parse_time.Plo \
-	./$(DEPDIR)/parse_value.Plo ./$(DEPDIR)/part_record.Plo \
-	./$(DEPDIR)/persist_conn.Plo ./$(DEPDIR)/plugin.Plo \
-	./$(DEPDIR)/plugrack.Plo ./$(DEPDIR)/port_mgr.Plo \
-	./$(DEPDIR)/print_fields.Plo ./$(DEPDIR)/probes.Plo \
-	./$(DEPDIR)/proc_args.Plo ./$(DEPDIR)/read_config.Plo \
-	./$(DEPDIR)/reverse_tree.Plo ./$(DEPDIR)/run_command.Plo \
-	./$(DEPDIR)/run_in_daemon.Plo ./$(DEPDIR)/sack_api.Plo \
-	./$(DEPDIR)/setproctitle.Plo ./$(DEPDIR)/sluid.Plo \
-	./$(DEPDIR)/slurm_errno.Plo ./$(DEPDIR)/slurm_opt.Plo \
-	./$(DEPDIR)/slurm_protocol_api.Plo \
+	./$(DEPDIR)/env.Plo ./$(DEPDIR)/events.Plo \
+	./$(DEPDIR)/extra_constraints.Plo ./$(DEPDIR)/fd.Plo \
+	./$(DEPDIR)/fetch_config.Plo ./$(DEPDIR)/forward.Plo \
+	./$(DEPDIR)/global_defaults.Plo ./$(DEPDIR)/group_cache.Plo \
+	./$(DEPDIR)/half_duplex.Plo ./$(DEPDIR)/hostlist.Plo \
+	./$(DEPDIR)/http.Plo ./$(DEPDIR)/http_con.Plo \
+	./$(DEPDIR)/http_mime.Plo ./$(DEPDIR)/http_router.Plo \
+	./$(DEPDIR)/http_switch.Plo ./$(DEPDIR)/id_util.Plo \
+	./$(DEPDIR)/identity.Plo ./$(DEPDIR)/io_hdr.Plo \
+	./$(DEPDIR)/job_features.Plo ./$(DEPDIR)/job_options.Plo \
+	./$(DEPDIR)/job_record.Plo ./$(DEPDIR)/job_resources.Plo \
+	./$(DEPDIR)/job_state_reason.Plo ./$(DEPDIR)/list.Plo \
+	./$(DEPDIR)/log.Plo ./$(DEPDIR)/msg_type.Plo \
+	./$(DEPDIR)/net.Plo ./$(DEPDIR)/node_conf.Plo \
+	./$(DEPDIR)/node_features.Plo ./$(DEPDIR)/oci_config.Plo \
+	./$(DEPDIR)/openapi.Plo ./$(DEPDIR)/optz.Plo \
+	./$(DEPDIR)/pack.Plo ./$(DEPDIR)/parse_config.Plo \
+	./$(DEPDIR)/parse_time.Plo ./$(DEPDIR)/parse_value.Plo \
+	./$(DEPDIR)/part_record.Plo ./$(DEPDIR)/persist_conn.Plo \
+	./$(DEPDIR)/plugin.Plo ./$(DEPDIR)/plugrack.Plo \
+	./$(DEPDIR)/port_mgr.Plo ./$(DEPDIR)/print_fields.Plo \
+	./$(DEPDIR)/probes.Plo ./$(DEPDIR)/proc_args.Plo \
+	./$(DEPDIR)/read_config.Plo ./$(DEPDIR)/reverse_tree.Plo \
+	./$(DEPDIR)/run_command.Plo ./$(DEPDIR)/run_in_daemon.Plo \
+	./$(DEPDIR)/sack_api.Plo ./$(DEPDIR)/setproctitle.Plo \
+	./$(DEPDIR)/sluid.Plo ./$(DEPDIR)/slurm_errno.Plo \
+	./$(DEPDIR)/slurm_opt.Plo ./$(DEPDIR)/slurm_protocol_api.Plo \
 	./$(DEPDIR)/slurm_protocol_defs.Plo \
 	./$(DEPDIR)/slurm_protocol_pack.Plo \
 	./$(DEPDIR)/slurm_protocol_socket.Plo \
@@ -604,6 +604,8 @@
 	env.h					\
 	extra_constraints.c			\
 	extra_constraints.h			\
+	events.c				\
+	events.h				\
 	fd.c					\
 	fd.h					\
 	fetch_config.c				\
@@ -862,6 +864,7 @@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dynamic_plugin_data.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eio.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/events.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/extra_constraints.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fd.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fetch_config.Plo@am__quote@ # am--include-marker
@@ -1090,6 +1093,7 @@
 	-rm -f ./$(DEPDIR)/dynamic_plugin_data.Plo
 	-rm -f ./$(DEPDIR)/eio.Plo
 	-rm -f ./$(DEPDIR)/env.Plo
+	-rm -f ./$(DEPDIR)/events.Plo
 	-rm -f ./$(DEPDIR)/extra_constraints.Plo
 	-rm -f ./$(DEPDIR)/fd.Plo
 	-rm -f ./$(DEPDIR)/fetch_config.Plo
@@ -1238,6 +1242,7 @@
 	-rm -f ./$(DEPDIR)/dynamic_plugin_data.Plo
 	-rm -f ./$(DEPDIR)/eio.Plo
 	-rm -f ./$(DEPDIR)/env.Plo
+	-rm -f ./$(DEPDIR)/events.Plo
 	-rm -f ./$(DEPDIR)/extra_constraints.Plo
 	-rm -f ./$(DEPDIR)/fd.Plo
 	-rm -f ./$(DEPDIR)/fetch_config.Plo
diff --git a/src/conmgr/events.c b/src/common/events.c
similarity index 86%
rename from src/conmgr/events.c
rename to src/common/events.c
index b094fcf..ec1dd18 100644
--- a/src/conmgr/events.c
+++ b/src/common/events.c
@@ -35,17 +35,16 @@
 
 #include <pthread.h>
 
+#include "src/common/events.h"
 #include "src/common/macros.h"
 #include "src/common/read_config.h"
 #include "src/common/timers.h"
 #include "src/common/xassert.h"
 #include <time.h>
 
-#include "src/conmgr/events.h"
-
 static void _wait_pending(event_signal_t *event, const char *caller)
 {
-	log_flag(CONMGR, "%s->%s: [EVENT:%s] wait skipped due to %d pending reliable signals",
+	log_flag(THREAD, "%s->%s: [EVENT:%s] wait skipped due to %d pending reliable signals",
 		 caller, __func__, event->name, event->pending);
 
 	xassert(!event->waiting);
@@ -60,10 +59,10 @@
 {
 	DEF_TIMERS;
 
-	if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
+	if (slurm_conf.debug_flags & DEBUG_FLAG_THREAD) {
 		START_TIMER;
 
-		log_flag(CONMGR, "%s->%s: [EVENT:%s] BEGIN wait with %d other waiters",
+		log_flag(THREAD, "%s->%s: [EVENT:%s] BEGIN wait with %d other waiters",
 			 caller, __func__, event->name, event->waiting);
 	}
 
@@ -81,11 +80,11 @@
 	xassert(event->waiting >= 0);
 	xassert(!event->pending);
 
-	if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
+	if (slurm_conf.debug_flags & DEBUG_FLAG_THREAD) {
 		/* we want the time but not to warn about a time limit */
-		END_TIMER3(NULL, 0);
+		END_TIMER;
 
-		log_flag(CONMGR, "%s->%s: [EVENT:%s] END waited after %s with %d other pending waiters",
+		log_flag(THREAD, "%s->%s: [EVENT:%s] END waited after %s with %d other pending waiters",
 			 caller, __func__, event->name, TIMER_STR(),
 			 event->waiting);
 	}
@@ -103,7 +102,7 @@
 static void _broadcast(event_signal_t *event, const char *caller)
 {
 	if (!event->waiting) {
-		log_flag(CONMGR, "%s->%s: [EVENT:%s] broadcast skipped due to 0 waiters with %d pending signals",
+		log_flag(THREAD, "%s->%s: [EVENT:%s] broadcast skipped due to 0 waiters with %d pending signals",
 			 caller, __func__, event->name, event->pending);
 		return;
 	}
@@ -111,7 +110,7 @@
 	/* can't have pending signals when there are waiters */
 	xassert(!event->pending);
 
-	log_flag(CONMGR, "%s->%s: [EVENT:%s] broadcasting to all %d waiters",
+	log_flag(THREAD, "%s->%s: [EVENT:%s] broadcasting to all %d waiters",
 		 caller, __func__, event->name, event->pending);
 
 	slurm_cond_broadcast(&event->cond);
@@ -122,7 +121,7 @@
 	/* can't have pending signals when there are waiters */
 	xassert(!event->pending);
 
-	log_flag(CONMGR, "%s->%s: [EVENT:%s] sending signal to 1/%d waiters",
+	log_flag(THREAD, "%s->%s: [EVENT:%s] sending signal to 1/%d waiters",
 		 caller, __func__, event->name, event->waiting);
 
 	slurm_cond_signal(&event->cond);
@@ -133,10 +132,10 @@
 	xassert(event->pending >= 0);
 
 	if (event->pending) {
-		log_flag(CONMGR, "%s->%s: [EVENT:%s] skipping signal to 0 waiters with %d signals pending",
+		log_flag(THREAD, "%s->%s: [EVENT:%s] skipping signal to 0 waiters with %d signals pending",
 			 caller, __func__, event->name, event->pending);
 	} else {
-		log_flag(CONMGR, "%s->%s: [EVENT:%s] enqueuing signal to 0 waiters with 0 signals pending",
+		log_flag(THREAD, "%s->%s: [EVENT:%s] enqueuing signal to 0 waiters with 0 signals pending",
 			 caller, __func__, event->name);
 		event->pending++;
 	}
diff --git a/src/conmgr/events.h b/src/common/events.h
similarity index 92%
rename from src/conmgr/events.h
rename to src/common/events.h
index 07a82d2..33e3622 100644
--- a/src/conmgr/events.h
+++ b/src/common/events.h
@@ -37,8 +37,8 @@
  * Named event based signaling and waiting
  */
 
-#ifndef _CONMGR_EVENTS_H
-#define _CONMGR_EVENTS_H
+#ifndef _SLURM_EVENTS_H
+#define _SLURM_EVENTS_H
 
 #include <pthread.h>
 #include <stdbool.h>
@@ -65,9 +65,9 @@
 	}
 
 #define EVENT_FREE_MEMBERS(event) \
-do { \
-	slurm_cond_destroy(&((event)->cond)); \
-} while (false)
+	do { \
+		slurm_cond_destroy(&((event)->cond)); \
+	} while (false)
 
 /*
  * Wait (aka block) for a signal for a given event
@@ -92,7 +92,7 @@
  * IN mutex - the mutex controlling event->cond
  */
 #define EVENT_WAIT(event, mutex) \
-	event_wait_now(event, mutex, (struct timespec) {0}, __func__)
+	event_wait_now(event, mutex, (struct timespec) { 0 }, __func__)
 
 /*
  * Wait (aka block) for a signal for a given event
@@ -122,13 +122,11 @@
  * Send signal to one waiter even if EVENT_WAIT() called later but drop signal
  * if there is already another reliable signal pending a waiter.
  */
-#define EVENT_SIGNAL(event) \
-	event_signal_now(false, event, __func__)
+#define EVENT_SIGNAL(event) event_signal_now(false, event, __func__)
 /*
  * Send signal to all currently waiting threads or drop signal if there are no
  * currently waiting threads.
  */
-#define EVENT_BROADCAST(event) \
-	event_signal_now(true, event, __func__)
+#define EVENT_BROADCAST(event) event_signal_now(true, event, __func__)
 
-#endif /* _CONMGR_EVENTS_H */
+#endif
diff --git a/src/common/macros.h b/src/common/macros.h
index 45598c0..b61d535 100644
--- a/src/common/macros.h
+++ b/src/common/macros.h
@@ -286,12 +286,12 @@
 /* Get __typeof__(s->f) */
 #define STRUCT_FIELD_TYPEOF(s, f) __typeof__(((s *) (NULL))->f)
 
-#define SWAP(x, y)		\
-do {				\
-	__typeof__(x) b = x;	\
-	x = y;			\
-	y = b;			\
-} while (0)
+#define SWAP(x, y) \
+	do { \
+		__typeof__(x) b = (x); \
+		(x) = (y); \
+		(y) = b; \
+	} while (0)
 
 /* macro to force stringification */
 #define XSTRINGIFY(s) XSTRINGIFY2(s)
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 35fc4c5..d38076e 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -2861,6 +2861,11 @@
 	/* Malloc space ahead of time to avoid realloc inside of xstrcat. */
 	char *state_str = xmalloc(100);
 
+	if (state == NO_VAL) {
+		xstrcat(state_str, "UNINITIALISED");
+		return state_str;
+	}
+
 	/* Process JOB_STATE_BASE */
 	switch (state & JOB_STATE_BASE) {
 	case JOB_PENDING:
diff --git a/src/common/slurm_time.c b/src/common/slurm_time.c
index 380c957..a199a7f 100644
--- a/src/common/slurm_time.c
+++ b/src/common/slurm_time.c
@@ -155,13 +155,17 @@
 
 	if (!ts.tv_nsec && !ts.tv_sec) {
 		return snprintf(buffer, buffer_len, "%s",
-				(abs_time ? "now" : "None"));
+				(abs_time ? "NEVER" : "now"));
 	}
 
 	ts = timespec_normalize(ts);
 
 	if (abs_time) {
-		ts = timespec_normalize(timespec_rem(ts, timespec_now()));
+		const timespec_diff_ns_t tdiff =
+			timespec_diff_ns(timespec_now(), ts);
+
+		ts = tdiff.diff;
+		negative = tdiff.after;
 
 		if (!ts.tv_nsec && !ts.tv_sec) {
 			return snprintf(buffer, buffer_len, "now");
@@ -169,7 +173,7 @@
 	}
 
 	/* Force positive time */
-	if (ts.tv_sec < 0) {
+	if ((ts.tv_sec < 0) || (ts.tv_nsec < 0)) {
 		negative = true;
 
 		ts.tv_sec *= -1;
@@ -242,25 +246,39 @@
 	return wrote;
 }
 
-extern timespec_t timespec_normalize(timespec_t ts)
+/* Normalize nsec to less than a second */
+static timespec_t _normalize(timespec_t ts)
 {
-	/* Force direction of time to be uniform */
-	if ((ts.tv_nsec < 0) && (ts.tv_sec > 0)) {
-		ts.tv_sec++;
-		ts.tv_nsec = NSEC_IN_SEC + ts.tv_nsec;
-	} else if ((ts.tv_nsec > 0) && (ts.tv_sec < 0)) {
-		ts.tv_sec--;
-		ts.tv_nsec = NSEC_IN_SEC - ts.tv_nsec;
-	}
-
 	return (timespec_t) {
 		.tv_sec = ts.tv_sec + (ts.tv_nsec / NSEC_IN_SEC),
 		.tv_nsec = (ts.tv_nsec % NSEC_IN_SEC),
 	};
 }
 
-extern timespec_t timespec_add(const timespec_t x, const timespec_t y)
+/* Force direction of time to be uniform */
+static timespec_t _uniform(timespec_t ts)
 {
+	if ((ts.tv_nsec < 0) && (ts.tv_sec > 0)) {
+		ts.tv_sec--;
+		ts.tv_nsec = (NSEC_IN_SEC + ts.tv_nsec);
+	} else if ((ts.tv_nsec > 0) && (ts.tv_sec < 0)) {
+		ts.tv_sec++;
+		ts.tv_nsec = (NSEC_IN_SEC - ts.tv_nsec);
+	}
+
+	return ts;
+}
+
+extern timespec_t timespec_normalize(timespec_t ts)
+{
+	return _normalize(_uniform(_normalize(ts)));
+}
+
+extern timespec_t timespec_add(timespec_t x, timespec_t y)
+{
+	x = timespec_normalize(x);
+	y = timespec_normalize(y);
+
 	/* Use 64bit accumulators to avoid overflow */
 	return timespec_normalize((timespec_t) {
 		.tv_sec = (((uint64_t) x.tv_sec) + ((uint64_t) y.tv_sec)),
@@ -268,25 +286,14 @@
 	});
 }
 
-extern timespec_t timespec_rem(const timespec_t x, const timespec_t y)
+extern timespec_t timespec_rem(timespec_t x, timespec_t y)
 {
-	/* Use 64bit accumulators to avoid underflow */
-	int64_t s = (((uint64_t) x.tv_sec) - ((uint64_t) y.tv_sec));
-	int64_t ns = (((uint64_t) x.tv_nsec) - ((uint64_t) y.tv_nsec));
-
-	/* reject underflow of time */
-	if (s <= 0)
-		return (timespec_t) {0};
-
-	/* force ns to be positive */
-	if (ns < 0) {
-		s--;
-		ns = NSEC_IN_SEC - ns;
-	}
+	x = timespec_normalize(x);
+	y = timespec_normalize(y);
 
 	return timespec_normalize((timespec_t) {
-		.tv_sec = s,
-		.tv_nsec = ns,
+		.tv_sec = (((uint64_t) x.tv_sec) - ((uint64_t) y.tv_sec)),
+		.tv_nsec = (((uint64_t) x.tv_nsec) - ((uint64_t) y.tv_nsec)),
 	});
 }
 
@@ -309,31 +316,20 @@
 extern timespec_diff_ns_t timespec_diff_ns(const timespec_t x,
 					   const timespec_t y)
 {
-	/* Use 64bit accumulators to catch underflows */
-	int64_t s = (((int64_t) x.tv_sec) - ((int64_t) y.tv_sec));
-	int64_t ns = (((int64_t) x.tv_nsec) - ((int64_t) y.tv_nsec));
+	timespec_t ts = timespec_rem(x, y);
 
-	/* Adjust positive nanoseconds if seconds is negative */
-	if ((ns > 0) && (s < 0)) {
-		s += 1;
-		ns -= NSEC_IN_SEC;
-	}
-
-	if (s < 0)
+	if ((ts.tv_sec < 0) || (ts.tv_nsec < 0))
 		return (timespec_diff_ns_t) {
 			.after = false,
-			.diff = {
-				.tv_sec = (-1 * s),
-				.tv_nsec = (-1 * ns),
-			},
+			.diff = ((timespec_t) {
+				.tv_sec = (-1 * ts.tv_sec),
+				.tv_nsec = (-1 * ts.tv_nsec),
+			}),
 		};
 	else
 		return (timespec_diff_ns_t) {
 			.after = true,
-			.diff = {
-				.tv_sec = s,
-				.tv_nsec = ns,
-			},
+			.diff = ts,
 		};
 }
 
diff --git a/src/common/slurm_time.h b/src/common/slurm_time.h
index aa6ad5d..abe1d29 100644
--- a/src/common/slurm_time.h
+++ b/src/common/slurm_time.h
@@ -122,10 +122,10 @@
 extern timespec_t timespec_normalize(timespec_t ts);
 
 /* Add timestamp X to timestamp Y */
-extern timespec_t timespec_add(const timespec_t x, const timespec_t y);
+extern timespec_t timespec_add(timespec_t x, timespec_t y);
 
 /* Subtract timestamp Y from timestamp X */
-extern timespec_t timespec_rem(const timespec_t x, const timespec_t y);
+extern timespec_t timespec_rem(timespec_t x, timespec_t y);
 
 /* Is timestamp X after timestamp Y */
 extern bool timespec_is_after(const timespec_t x, const timespec_t y);
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index b9c5274..b0c5bf9 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -1443,25 +1443,14 @@
 	return SLURM_ERROR;
 }
 
-/*
- * Get the memory limits of the step
- * Returns uid of the running step if successful.  On error returns -1.
- */
-extern int stepd_get_mem_limits(int fd, uint16_t protocol_version,
-				slurmstepd_mem_info_t *stepd_mem_info)
+extern int stepd_get_mem_limit(int fd, uint16_t protocol_version,
+			       uint64_t *job_mem_limit)
 {
 	int req = REQUEST_STEP_MEM_LIMITS;
 
-	xassert(stepd_mem_info);
-	memset(stepd_mem_info, 0, sizeof(slurmstepd_mem_info_t));
+	safe_write(fd, &req, sizeof(int));
 
-	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
-		safe_write(fd, &req, sizeof(int));
-
-		safe_read(fd, &stepd_mem_info->job_mem_limit, sizeof(uint32_t));
-		safe_read(fd, &stepd_mem_info->step_mem_limit,
-			  sizeof(uint32_t));
-	}
+	safe_read(fd, &job_mem_limit, sizeof(uint64_t));
 
 	return SLURM_SUCCESS;
 rwfail:
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index 7fb8901..0ae997a 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -316,10 +316,9 @@
 
 /*
  * Get the memory limits of the step
- * Returns uid of the running step if successful.  On error returns -1.
  */
-extern int stepd_get_mem_limits(int fd, uint16_t protocol_version,
-				slurmstepd_mem_info_t *stepd_mem_info);
+extern int stepd_get_mem_limit(int fd, uint16_t protocol_version,
+			       uint64_t *job_mem_limit);
 
 /*
  * Get the uid of the step
diff --git a/src/common/threadpool.c b/src/common/threadpool.c
index d66f1a0..c02203b 100644
--- a/src/common/threadpool.c
+++ b/src/common/threadpool.c
@@ -141,7 +141,13 @@
 static void _set_thread_name(const char *name)
 {
 #if HAVE_SYS_PRCTL_H
-	xassert(strlen(name) < PRCTL_BUF_BYTES);
+
+#ifndef NDEBUG
+	if (strlen(name) >= PRCTL_BUF_BYTES)
+		warning("Thread name truncated[%zu/%zu]: %s",
+			(uint64_t) strlen(name), (uint64_t) PRCTL_BUF_BYTES,
+			name);
+#endif
 
 	if (prctl(PR_SET_NAME, name, NULL, NULL, NULL))
 		error("%s: cannot set process name to %s %m", __func__, name);
@@ -150,13 +156,11 @@
 
 static void _thread_free(thread_t *thread)
 {
-#ifdef MEMORY_LEAK_DEBUG
 	xassert(thread);
 	xassert(thread->magic == THREAD_MAGIC);
 
 	thread->magic = ~THREAD_MAGIC;
 	xfree(thread);
-#endif
 }
 
 static void *_thread(void *arg)
diff --git a/src/conmgr/Makefile.am b/src/conmgr/Makefile.am
index f385152..21bf61d 100644
--- a/src/conmgr/Makefile.am
+++ b/src/conmgr/Makefile.am
@@ -13,8 +13,6 @@
 	conmgr.h \
 	delayed.c \
 	delayed.h \
-	events.c \
-	events.h \
 	io.c \
 	mgr.h \
 	poll.c \
diff --git a/src/conmgr/Makefile.in b/src/conmgr/Makefile.in
index 54f88fc..32c0bb2 100644
--- a/src/conmgr/Makefile.in
+++ b/src/conmgr/Makefile.in
@@ -163,8 +163,8 @@
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 libconmgr_la_LIBADD =
 @HAVE_EPOLL_TRUE@am__objects_1 = epoll.lo
-am_libconmgr_la_OBJECTS = con.lo conmgr.lo delayed.lo events.lo io.lo \
-	poll.lo polling.lo rpc.lo signals.lo tls.lo tls_fingerprint.lo \
+am_libconmgr_la_OBJECTS = con.lo conmgr.lo delayed.lo io.lo poll.lo \
+	polling.lo rpc.lo signals.lo tls.lo tls_fingerprint.lo \
 	watch.lo work.lo workers.lo $(am__objects_1)
 libconmgr_la_OBJECTS = $(am_libconmgr_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
@@ -194,7 +194,7 @@
 am__maybe_remake_depfiles = depfiles
 am__depfiles_remade = ./$(DEPDIR)/con.Plo ./$(DEPDIR)/conmgr.Plo \
 	./$(DEPDIR)/delayed.Plo ./$(DEPDIR)/epoll.Plo \
-	./$(DEPDIR)/events.Plo ./$(DEPDIR)/io.Plo ./$(DEPDIR)/poll.Plo \
+	./$(DEPDIR)/io.Plo ./$(DEPDIR)/poll.Plo \
 	./$(DEPDIR)/polling.Plo ./$(DEPDIR)/rpc.Plo \
 	./$(DEPDIR)/signals.Plo ./$(DEPDIR)/tls.Plo \
 	./$(DEPDIR)/tls_fingerprint.Plo ./$(DEPDIR)/watch.Plo \
@@ -505,9 +505,9 @@
 AM_CPPFLAGS = -I$(top_srcdir)
 noinst_LTLIBRARIES = libconmgr.la
 libconmgr_la_SOURCES = con.c conmgr.c conmgr.h delayed.c delayed.h \
-	events.c events.h io.c mgr.h poll.c polling.c polling.h rpc.c \
-	signals.c signals.h tls.c tls.h tls_fingerprint.c \
-	tls_fingerprint.h watch.c work.c workers.c $(am__append_1)
+	io.c mgr.h poll.c polling.c polling.h rpc.c signals.c \
+	signals.h tls.c tls.h tls_fingerprint.c tls_fingerprint.h \
+	watch.c work.c workers.c $(am__append_1)
 libconmgr_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic
 
 # This was made so we could export all symbols from libconmgr
@@ -573,7 +573,6 @@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conmgr.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/delayed.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/epoll.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/events.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/io.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poll.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/polling.Plo@am__quote@ # am--include-marker
@@ -713,7 +712,6 @@
 	-rm -f ./$(DEPDIR)/conmgr.Plo
 	-rm -f ./$(DEPDIR)/delayed.Plo
 	-rm -f ./$(DEPDIR)/epoll.Plo
-	-rm -f ./$(DEPDIR)/events.Plo
 	-rm -f ./$(DEPDIR)/io.Plo
 	-rm -f ./$(DEPDIR)/poll.Plo
 	-rm -f ./$(DEPDIR)/polling.Plo
@@ -773,7 +771,6 @@
 	-rm -f ./$(DEPDIR)/conmgr.Plo
 	-rm -f ./$(DEPDIR)/delayed.Plo
 	-rm -f ./$(DEPDIR)/epoll.Plo
-	-rm -f ./$(DEPDIR)/events.Plo
 	-rm -f ./$(DEPDIR)/io.Plo
 	-rm -f ./$(DEPDIR)/poll.Plo
 	-rm -f ./$(DEPDIR)/polling.Plo
diff --git a/src/conmgr/epoll.c b/src/conmgr/epoll.c
index bee7030..56c0f27 100644
--- a/src/conmgr/epoll.c
+++ b/src/conmgr/epoll.c
@@ -42,6 +42,7 @@
 #include "slurm/slurm.h"
 #include "slurm/slurm_errno.h"
 
+#include "src/common/events.h"
 #include "src/common/fd.h"
 #include "src/common/log.h"
 #include "src/common/macros.h"
@@ -52,7 +53,6 @@
 #include "src/common/xstring.h"
 
 #include "src/conmgr/polling.h"
-#include "src/conmgr/events.h"
 
 /*
  * Size event count for 1 input and 1 output per connection and
@@ -607,7 +607,7 @@
 	safe_write(fd, buf, 1);
 
 	if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
-		END_TIMER3(NULL, 0);
+		END_TIMER;
 		log_flag(CONMGR, "%s->%s: [EPOLL] interrupt byte sent in %s",
 			 caller, __func__, TIMER_STR());
 	}
diff --git a/src/conmgr/mgr.h b/src/conmgr/mgr.h
index b64bb82..02b59da 100644
--- a/src/conmgr/mgr.h
+++ b/src/conmgr/mgr.h
@@ -50,11 +50,11 @@
 
 #include "slurm/slurm.h"
 
+#include "src/common/events.h"
 #include "src/common/pack.h"
 #include "src/common/probes.h"
 
 #include "src/conmgr/conmgr.h"
-#include "src/conmgr/events.h"
 #include "src/conmgr/polling.h"
 
 /* Default buffer to 1 page */
diff --git a/src/conmgr/poll.c b/src/conmgr/poll.c
index a6658a1..c1223ce 100644
--- a/src/conmgr/poll.c
+++ b/src/conmgr/poll.c
@@ -43,6 +43,7 @@
 #include "slurm/slurm.h"
 #include "slurm/slurm_errno.h"
 
+#include "src/common/events.h"
 #include "src/common/fd.h"
 #include "src/common/log.h"
 #include "src/common/macros.h"
@@ -52,7 +53,6 @@
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
 
-#include "src/conmgr/events.h"
 #include "src/conmgr/polling.h"
 
 /*
@@ -643,7 +643,7 @@
 	safe_write(fd, buf, 1);
 
 	if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
-		END_TIMER3(NULL, 0);
+		END_TIMER;
 		log_flag(CONMGR, "%s->%s: [POLL] interrupt byte sent in %s",
 			 caller, __func__, TIMER_STR());
 	}
diff --git a/src/conmgr/watch.c b/src/conmgr/watch.c
index cc24b4a..0da6919 100644
--- a/src/conmgr/watch.c
+++ b/src/conmgr/watch.c
@@ -48,6 +48,7 @@
 #include "slurm/slurm.h"
 #include "slurm/slurm_errno.h"
 
+#include "src/common/events.h"
 #include "src/common/fd.h"
 #include "src/common/list.h"
 #include "src/common/macros.h"
@@ -59,7 +60,6 @@
 
 #include "src/conmgr/conmgr.h"
 #include "src/conmgr/delayed.h"
-#include "src/conmgr/events.h"
 #include "src/conmgr/mgr.h"
 #include "src/conmgr/polling.h"
 #include "src/conmgr/signals.h"
diff --git a/src/conmgr/work.c b/src/conmgr/work.c
index 640569b..42e5d57 100644
--- a/src/conmgr/work.c
+++ b/src/conmgr/work.c
@@ -33,6 +33,7 @@
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 \*****************************************************************************/
 
+#include "src/common/events.h"
 #include "src/common/list.h"
 #include "src/common/macros.h"
 #include "src/common/proc_args.h"
@@ -42,7 +43,6 @@
 
 #include "src/conmgr/conmgr.h"
 #include "src/conmgr/delayed.h"
-#include "src/conmgr/events.h"
 #include "src/conmgr/mgr.h"
 #include "src/conmgr/signals.h"
 
diff --git a/src/conmgr/workers.c b/src/conmgr/workers.c
index c7b1933..a493658 100644
--- a/src/conmgr/workers.c
+++ b/src/conmgr/workers.c
@@ -35,6 +35,7 @@
 
 #include "slurm/slurm_errno.h"
 
+#include "src/common/events.h"
 #include "src/common/macros.h"
 #include "src/common/probes.h"
 #include "src/common/read_config.h"
@@ -45,7 +46,6 @@
 #include "src/common/xsched.h"
 
 #include "src/conmgr/conmgr.h"
-#include "src/conmgr/events.h"
 #include "src/conmgr/mgr.h"
 
 /* Limit automatically set default thread count */
diff --git a/src/plugins/auth/slurm/sack.c b/src/plugins/auth/slurm/sack.c
index 44db36d..8f020f1 100644
--- a/src/plugins/auth/slurm/sack.c
+++ b/src/plugins/auth/slurm/sack.c
@@ -365,9 +365,12 @@
 
 extern int auth_p_get_reconfig_fd(void)
 {
-	/* Prepare for reconfigure */
-	setenvf(NULL, SACK_RECONFIG_ENV, "%d", sack_fd);
-	fd_set_noclose_on_exec(sack_fd);
+	if (sack_fd >= 0) {
+		/* Prepare for reconfigure */
+		setenvf(NULL, SACK_RECONFIG_ENV, "%d", sack_fd);
+		fd_set_noclose_on_exec(sack_fd);
+	}
 
+	/* sack_fd init'd to -1 and won't be added for skip close() later. */
 	return sack_fd;
 }
diff --git a/src/plugins/namespace/linux/namespace_linux.c b/src/plugins/namespace/linux/namespace_linux.c
index 1eb1726..616ca61 100644
--- a/src/plugins/namespace/linux/namespace_linux.c
+++ b/src/plugins/namespace/linux/namespace_linux.c
@@ -80,6 +80,7 @@
 
 static ns_conf_t *ns_conf = NULL;
 static bool plugin_disabled = false;
+static pid_t ns_pid = -1;
 
 /* NS_L_NS must be last */
 enum ns_l_types {
@@ -181,7 +182,7 @@
 		return _delete_ns(job_id);
 	}
 
-	close(fd);
+	fd_close(&fd);
 
 	return SLURM_SUCCESS;
 }
@@ -212,10 +213,7 @@
 #ifdef MEMORY_LEAK_DEBUG
 	for (int i = 0; i < NS_L_END; i++) {
 		xfree(ns_l_enabled[i].path);
-		if (ns_l_enabled[i].fd >= 0) {
-			close(ns_l_enabled[i].fd);
-			ns_l_enabled[i].fd = -1;
-		}
+		fd_close(&ns_l_enabled[i].fd);
 	}
 	free_ns_conf();
 #endif
@@ -551,7 +549,7 @@
 
 static int _clonens_user_setup(stepd_step_rec_t *step, pid_t pid)
 {
-	int fd = 0, rc = SLURM_SUCCESS;
+	int fd = -1, rc = SLURM_SUCCESS;
 	char *tmpstr = NULL;
 
 	if (!ns_l_enabled[NS_L_USER].enabled)
@@ -596,8 +594,7 @@
 		rc = SLURM_ERROR;
 		goto end_it;
 	}
-	if (fd >= 0)
-		close(fd);
+	fd_close(&fd);
 	xfree(tmpstr);
 
 	xstrfmtcat(tmpstr, "/proc/%d/gid_map", pid);
@@ -614,8 +611,7 @@
 	}
 
 end_it:
-	if (fd >= 0)
-		close(fd);
+	fd_close(&fd);
 	xfree(tmpstr);
 	return rc;
 }
@@ -630,7 +626,6 @@
 	unsigned long tls = 0;
 	sem_t *sem1 = NULL;
 	sem_t *sem2 = NULL;
-	pid_t cpid;
 
 	_create_paths(step->step_id.job_id, &job_mount, &ns_base, &src_bind);
 
@@ -675,7 +670,7 @@
 			rc = -1;
 			goto exit2;
 		}
-		close(fd);
+		fd_close(&fd);
 	}
 
 	/* Create location for bind mounts to go */
@@ -744,14 +739,14 @@
 		error("%s: sem_init: %m", __func__);
 		goto exit1;
 	}
-	cpid = sys_clone(ns_conf->clonensflags|SIGCHLD, &parent_tid,
-			 &child_tid, tls);
+	ns_pid = sys_clone(ns_conf->clonensflags | SIGCHLD, &parent_tid,
+			   &child_tid, tls);
 
-	if (cpid == -1) {
+	if (ns_pid == -1) {
 		error("%s: sys_clone failed: %m", __func__);
 		rc = -1;
 		goto exit1;
-	} else if (cpid == 0) {
+	} else if (ns_pid == 0) {
 		_create_ns_child(step, src_bind, job_mount, sem1, sem2);
 	} else {
 		char *proc_path = NULL;
@@ -763,7 +758,7 @@
 		for (int i = 0; i < NS_L_END; i++) {
 			if (!ns_l_enabled[i].enabled)
 				continue;
-			xstrfmtcat(proc_path, "/proc/%u/ns/%s", cpid,
+			xstrfmtcat(proc_path, "/proc/%u/ns/%s", ns_pid,
 				   ns_l_enabled[i].proc_name);
 			rc = mount(proc_path, ns_l_enabled[i].path, NULL,
 				   MS_BIND, NULL);
@@ -780,7 +775,7 @@
 		}
 
 		/* setup users before setting up the rest of the container */
-		if ((rc = _clonens_user_setup(step, cpid))) {
+		if ((rc = _clonens_user_setup(step, ns_pid))) {
 			error("%s: Unable to prepare user namespace.",
 			      __func__);
 			/* error needs to fall though here */
@@ -799,9 +794,9 @@
 			goto exit1;
 		}
 
-		if (proctrack_g_add(step, cpid) != SLURM_SUCCESS) {
+		if (proctrack_g_add(step, ns_pid) != SLURM_SUCCESS) {
 			error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.",
-			      __func__, step->step_id.job_id, cpid);
+			      __func__, step->step_id.job_id, ns_pid);
 			rc = SLURM_ERROR;
 			goto exit1;
 		}
@@ -952,8 +947,7 @@
 		if (!ns_l_enabled[i].enabled)
 			continue;
 		rc = setns(ns_l_enabled[i].fd, 0);
-		close(ns_l_enabled[i].fd);
-		ns_l_enabled[i].fd = -1;
+		fd_close(&ns_l_enabled[i].fd);
 		if (rc) {
 			error("%s: setns failed for %s: %m",
 			      __func__, ns_l_enabled[i].path);
@@ -1063,6 +1057,17 @@
 	if (plugin_disabled)
 		return SLURM_SUCCESS;
 
+	if (ns_pid) {
+		int wstatus;
+		/*
+		 * The namespace process may have been signaled already, but
+		 * kill it to be sure.
+		 */
+		kill(ns_pid, SIGKILL);
+		waitpid(ns_pid, &wstatus, 0);
+		ns_pid = -1;
+	}
+
 	return _delete_ns(step_id->job_id);
 }
 
@@ -1167,7 +1172,6 @@
 		rc = SLURM_SUCCESS;
 	}
 end:
-	if (fd >= 0)
-		close(fd);
+	fd_close(&fd);
 	return rc;
 }
diff --git a/src/plugins/select/cons_tres/job_test.c b/src/plugins/select/cons_tres/job_test.c
index d6a1739..b78a752 100644
--- a/src/plugins/select/cons_tres/job_test.c
+++ b/src/plugins/select/cons_tres/job_test.c
@@ -541,6 +541,8 @@
 		 * Do not allocate more jobs to nodes with completing jobs,
 		 * backfill scheduler independently handles completing nodes
 		 */
+		log_flag(SELECT_TYPE, "Node %s is in COMPLETING state, skip trying to schedule %pJ on this node.",
+			 node_ptr->name, job_ptr);
 		return NULL;
 	}
 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index cae5aca..298d47a 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3419,9 +3419,6 @@
 					   sizeof(job_record_t *));
 		job_array_hash_t = xcalloc(hash_table_size,
 					   sizeof(job_record_t *));
-		if (xstrcasestr(slurm_conf.sched_params,
-				"enable_job_state_cache"))
-			setup_job_state_hash(hash_table_size);
 	} else if (hash_table_size < (slurm_conf.max_job_cnt / 2)) {
 		/* If the MaxJobCount grows by too much, the hash table will
 		 * be ineffective without rebuilding. We don't presently bother
@@ -7969,6 +7966,7 @@
 
 	/* Add a temporary job_ptr for node_features_g_job_valid */
 	job_ptr = xmalloc(sizeof(job_record_t));
+	job_ptr->magic = JOB_MAGIC;
 	job_ptr->details = xmalloc(sizeof(job_details_t));
 	/* Point, don't dup, so don't free */
 	job_ptr->details->features = job_desc->features;
@@ -8072,6 +8070,7 @@
 	FREE_NULL_LIST(job_ptr->details->feature_list);
 	FREE_NULL_LIST(job_ptr->details->prefer_list);
 	xfree(job_ptr->details);
+	job_ptr->magic = ~JOB_MAGIC;
 	xfree(job_ptr);
 
 	return rc;
diff --git a/src/slurmctld/job_state.c b/src/slurmctld/job_state.c
index 3520495..242ec52 100644
--- a/src/slurmctld/job_state.c
+++ b/src/slurmctld/job_state.c
@@ -33,97 +33,11 @@
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 \*****************************************************************************/
 
-#define _GNU_SOURCE
-#include <pthread.h>
-
 #include "src/common/macros.h"
-#include "src/common/xahash.h"
-#include "src/common/xstring.h"
 
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/slurmctld.h"
 
-/* Macro to only include the code inside of ONLY_DEBUG if !NDEBUG */
-#ifndef NDEBUG
-#define ONLY_DEBUG(...) __VA_ARGS__
-#else
-#define ONLY_DEBUG(...)
-#endif
-
-/*
- * Favor writer lock acquisition to avoid delaying the scheduling thread
- * when under heavy client load. Clients can be safely delayed, job launch
- * is most important.
- */
-#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
-#define CACHE_LOCK_INIT PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
-#else
-#define CACHE_LOCK_INIT PTHREAD_RWLOCK_INITIALIZER
-#endif
-
-#define JOB_STATE_MIMIC_RECORD(js)                                             \
-	&(const job_record_t)                                                  \
-	{                                                                      \
-		.magic = JOB_MAGIC, .job_id = js->job_id,                      \
-		.array_job_id = js->array_job_id,                              \
-		.array_task_id = js->array_task_id,                            \
-		.het_job_id = js->het_job_id, .job_state = js->job_state,      \
-		.array_recs =                                                  \
-			(!js->task_id_bitmap ?                                 \
-				 NULL :                                        \
-				 &(job_array_struct_t){                        \
-					 .task_cnt =                           \
-						 bit_size(js->task_id_bitmap), \
-					 .task_id_bitmap = js->task_id_bitmap, \
-				 }),                                           \
-	}
-
-#define ARRAY_JOB_STATE_MIMIC_RECORD(ajs)                  \
-	&(const job_record_t)                              \
-	{                                                  \
-		.magic = JOB_MAGIC, .job_id = ajs->job_id, \
-		.array_task_id = NO_VAL,                   \
-	}
-
-#define ARRAY_TASK_STATE_KEY_BYTES sizeof(array_task_state_cached_t)
-#define ARRAY_TASK_STATE_KEY_JOB_ID(find_array_job_id, find_array_task_id) \
-	&(array_task_state_cached_t)                                       \
-	{                                                                  \
-		ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, )       \
-			.job_id = NO_VAL, /* not used in search */         \
-			.array_job_id = find_array_job_id,                 \
-		       .array_task_id = find_array_task_id,                \
-	}
-#define ARRAY_TASK_STATE_KEY_JOB_PTR(job_ptr)                        \
-	&(array_task_state_cached_t)                                 \
-	{                                                            \
-		ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, ) \
-			.job_id = job_ptr->job_id,                   \
-		       .array_job_id = job_ptr->array_job_id,        \
-		       .array_task_id = job_ptr->array_task_id,      \
-	}
-#define ARRAY_TASK_STATE_KEY_SELECTED_STEP(selected_step)                    \
-	&(array_task_state_cached_t)                                         \
-	{                                                                    \
-		ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, )         \
-			.job_id = ((selected_step->array_task_id < NO_VAL) ? \
-					   NO_VAL :                          \
-					   selected_step->step_id.job_id),   \
-		       .array_job_id =                                       \
-			       ((selected_step->array_task_id < NO_VAL) ?    \
-					selected_step->step_id.job_id :      \
-					0),                                  \
-		       .array_task_id = selected_step->array_task_id,        \
-	}
-
-#define ARRAY_TASK_STATE_MIMIC_RECORD(ats)                 \
-	&(const job_record_t)                              \
-	{                                                  \
-		.magic = JOB_MAGIC, .job_id = ats->job_id, \
-		.array_job_id = ats->array_job_id,         \
-		.array_task_id = ats->array_task_id,       \
-	}
-
 #define MAGIC_JOB_STATE_ARGS 0x0a0beeee
 
 typedef struct {
@@ -135,125 +49,8 @@
 	bool count_only;
 } job_state_args_t;
 
-#define MAGIC_CACHE_TABLE_STATE 0x1a0beffe
-typedef struct {
-	ONLY_DEBUG(int magic;) /* MAGIC_CACHE_TABLE_STATE */
-	int table_size;
-} cache_table_state_t;
-
-#define MAGIC_JOB_STATE_CACHED 0x1aa0affb
-typedef struct {
-	ONLY_DEBUG(int magic;) /* MAGIC_JOB_STATE_CACHED */
-	uint32_t job_id;
-	uint32_t job_state;
-	uint32_t het_job_id;
-	uint32_t array_job_id;
-	uint32_t array_task_id;
-	bitstr_t *task_id_bitmap;
-} job_state_cached_t;
-
-/* maps array job_id & task_id to non-array job_id */
-#define MAGIC_ARRAY_TASK_STATE_CACHED 0xb2a00fcb
-typedef struct {
-	ONLY_DEBUG(int magic;) /* MAGIC_ARRAY_TASK_STATE_CACHED */
-	uint32_t job_id;
-	uint32_t array_job_id;
-	uint32_t array_task_id;
-} array_task_state_cached_t;
-
-/* maps array job_id to circular linked list of non-array job_ids */
-#define MAGIC_ARRAY_JOB_STATE_CACHED 0xb21f0fca
-typedef struct {
-	ONLY_DEBUG(int magic;) /* MAGIC_ARRAY_JOB_STATE_CACHED */
-	uint32_t job_id;
-	uint32_t next_job_id; /* next job_id of member of array or 0 */
-} array_job_state_cached_t;
-
-/*
- * Caches job state (outside of job_mgr.c and JOB_LOCK).
- *
- * Hash: job_id
- * entry type: job_state_cached_t
- *	Includes all Job ID info required to identify each job.
- *	1 job per entry.
- * state type: cache_table_state_t
- *	Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *cache_table = NULL;
-/*
- * Caches linked list of Array job's job_id to allow finding every job member of
- * an array job.
- *
- * Hash: job_id
- * entry type: job_state_cached_t
- *	Includes all Job ID info required to identify each job.
- *	1 job per entry.
- * state type: cache_table_state_t
- *	Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *array_job_cache_table = NULL;
-/*
- * Caches mapping of array_job_id & array_task_id to job_id of every member of
- * an array job.
- *
- * Hash: array_job_id & array_task_id
- * entry type: array_task_state_cached_t
- * state type: cache_table_state_t
- *	Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *array_task_cache_table = NULL;
-static pthread_rwlock_t cache_lock = CACHE_LOCK_INIT;
-
-#ifndef NDEBUG
-
-#define DEF_DEBUG_TIMER DEF_TIMERS
-#define START_DEBUG_TIMER START_TIMER
-#define END_DEBUG_TIMER END_TIMER2(__func__)
-
-#define T(x) { x, XSTRINGIFY(x) }
-static const struct {
-	uint32_t flag;
-	char *string;
-} job_flags[] = {
-	T(JOB_LAUNCH_FAILED),
-	T(JOB_REQUEUE),
-	T(JOB_REQUEUE_HOLD),
-	T(JOB_SPECIAL_EXIT),
-	T(JOB_RESIZING),
-	T(JOB_CONFIGURING),
-	T(JOB_COMPLETING),
-	T(JOB_STOPPED),
-	T(JOB_RECONFIG_FAIL),
-	T(JOB_POWER_UP_NODE),
-	T(JOB_REVOKED),
-	T(JOB_REQUEUE_FED),
-	T(JOB_RESV_DEL_HOLD),
-	T(JOB_SIGNALING),
-	T(JOB_STAGE_OUT),
-	T(JOB_EXPEDITING),
-};
-
-static void _check_job_state(const uint32_t state)
-{
-	uint32_t flags;
-
-	if (!(slurm_conf.debug_flags & DEBUG_FLAG_TRACE_JOBS))
-		return;
-
-	flags = (state & JOB_STATE_FLAGS);
-
-	xassert((state & JOB_STATE_BASE) < JOB_END);
-
-	for (int i = 0; i < ARRAY_SIZE(job_flags); i++)
-		if ((flags & job_flags[i].flag) == job_flags[i].flag)
-			flags &= ~(job_flags[i].flag);
-
-	/* catch any bits that are not known flags */
-	xassert(!flags);
-}
-
 static void _log_job_state_change(const job_record_t *job_ptr,
-				  const uint32_t new_state)
+				  const uint32_t new_state, const char *caller)
 {
 	char *before_str, *after_str;
 
@@ -263,57 +60,32 @@
 	before_str = job_state_string_complete(job_ptr->job_state);
 	after_str = job_state_string_complete(new_state);
 
-	if (job_ptr->job_state == new_state)
-		log_flag(TRACE_JOBS, "%s: [%pJ] no-op change state: %s",
-			 __func__, job_ptr, before_str);
-	else
+	if (job_ptr->job_state == new_state) {
+		if (get_log_level() >= LOG_LEVEL_DEBUG4)
+			log_flag(TRACE_JOBS, "%s: [%pJ] no-op change state: %s",
+				 caller, job_ptr, before_str);
+	} else {
 		log_flag(TRACE_JOBS, "%s: [%pJ] change state: %s -> %s",
-			 __func__, job_ptr, before_str, after_str);
+			 caller, job_ptr, before_str, after_str);
+	}
 
 	xfree(before_str);
 	xfree(after_str);
 }
 
-#else /* NDEBUG */
-
-/*
- * Replace magic/sanity checks with no-ops that reference args to avoid compile
- * warnings.
- */
-
-#define _check_job_state(state) {(void) state;}
-#define _log_job_state_change(job_ptr, new_state) \
-	{(void) job_ptr; (void) new_state;}
-#define DEF_DEBUG_TIMER
-#define START_DEBUG_TIMER
-#define END_DEBUG_TIMER
-
-#endif /* NDEBUG */
-
-#define _log_array_job_chain(js, caller, fmt, ...) {}
-#define _check_all_jobs(compare_job_ptrs) {}
-#define _is_debug() (false)
-#define LOG(fmt, ...) log_flag(TRACE_JOBS, "%s: " fmt, __func__, ##__VA_ARGS__)
-#define _check_job_id(job_id_ptr) {(void) job_id_ptr;}
-#define _check_job_magic(js) {(void) js;}
-#define _check_array_job_magic(ajs) {(void) ajs;}
-#define _check_array_job_magic_links(job_id, array_job_id, should_be_linked) \
-	{(void) job_id; (void) array_job_id; (void) should_be_linked;}
-#define _check_array_task_magic(ats) {(void) ats;}
-#define _check_state_magic(state) {(void) state;}
-
-extern void job_state_set(job_record_t *job_ptr, uint32_t state)
+extern void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+				const char *caller)
 {
 	xassert(verify_lock(JOB_LOCK, WRITE_LOCK));
-	_check_job_state(state);
-	_log_job_state_change(job_ptr, state);
+	_log_job_state_change(job_ptr, state, caller);
 
-	on_job_state_change(job_ptr, state);
+	slurm_on_job_state_change(job_ptr, state, caller);
 
 	job_ptr->job_state = state;
 }
 
-extern void job_state_set_flag(job_record_t *job_ptr, uint32_t flag)
+extern void slurm_job_state_set_flag(job_record_t *job_ptr, uint32_t flag,
+				     const char *caller)
 {
 	uint32_t job_state;
 
@@ -322,15 +94,15 @@
 	xassert(flag & JOB_STATE_FLAGS);
 
 	job_state = job_ptr->job_state | flag;
-	_check_job_state(job_state);
-	_log_job_state_change(job_ptr, job_state);
+	_log_job_state_change(job_ptr, job_state, caller);
 
-	on_job_state_change(job_ptr, job_state);
+	slurm_on_job_state_change(job_ptr, job_state, caller);
 
 	job_ptr->job_state = job_state;
 }
 
-extern void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag)
+extern void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+				       const char *caller)
 {
 	uint32_t job_state;
 
@@ -339,10 +111,9 @@
 	xassert(flag & JOB_STATE_FLAGS);
 
 	job_state = job_ptr->job_state & ~flag;
-	_check_job_state(job_state);
-	_log_job_state_change(job_ptr, job_state);
+	_log_job_state_change(job_ptr, job_state, caller);
 
-	on_job_state_change(job_ptr, job_state);
+	slurm_on_job_state_change(job_ptr, job_state, caller);
 
 	job_ptr->job_state = job_state;
 }
@@ -424,228 +195,6 @@
 	}
 }
 
-static int _add_cache_job(job_state_args_t *args, const job_state_cached_t *js)
-{
-	job_state_response_job_t *rjob;
-
-	xassert(args->magic == MAGIC_JOB_STATE_ARGS);
-	xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
-	rjob = _append_job_state(args);
-
-	if (args->count_only)
-		return SLURM_SUCCESS;
-
-	if (!rjob) {
-		LOG("[%pJ] packing at %d/%d failed",
-		    JOB_STATE_MIMIC_RECORD(js), args->count, args->jobs_count);
-		return ERANGE;
-	}
-
-	if (_is_debug()) {
-		char *bitstr =
-			(js->task_id_bitmap ? bit_fmt_full(js->task_id_bitmap) :
-					      NULL);
-		const size_t bits =
-			(js->task_id_bitmap ? bit_size(js->task_id_bitmap) : 0);
-
-		LOG("[%pJ] packing JobId=%u ArrayJobId=%u ArrayTaskId=%u array_task_id_bitmap[%zu]=%s HetJobId=%u state:%s",
-		    JOB_STATE_MIMIC_RECORD(js), js->job_id, js->array_job_id,
-		    js->array_task_id, bits, (bitstr ? bitstr : ""),
-		    js->het_job_id, job_state_string(js->job_state));
-
-		(void) bits;
-		xfree(bitstr);
-	}
-
-	rjob->job_id = js->job_id;
-	rjob->array_job_id = js->array_job_id;
-	rjob->array_task_id = js->array_task_id;
-	if (js->task_id_bitmap)
-		rjob->array_task_id_bitmap = bit_copy(js->task_id_bitmap);
-	rjob->het_job_id = js->het_job_id;
-	rjob->state = js->job_state;
-
-	return SLURM_SUCCESS;
-}
-
-static xahash_foreach_control_t _foreach_cache_job(void *entry, void *state_ptr,
-						   void *arg)
-{
-	const job_state_cached_t *js = entry;
-	ONLY_DEBUG(cache_table_state_t *state = state_ptr;)
-	job_state_args_t *args = arg;
-
-	xassert(!state || (state->magic == MAGIC_CACHE_TABLE_STATE));
-	xassert(args->magic == MAGIC_JOB_STATE_ARGS);
-	xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
-	if (_add_cache_job(args, js))
-		return XAHASH_FOREACH_FAIL;
-
-	return XAHASH_FOREACH_CONT;
-}
-
-static void _find_job_state_cached_by_job_id(job_state_args_t *args,
-					     uint32_t job_id, bool resolve)
-{
-	const job_state_cached_t *js;
-
-	if (!(js = xahash_find_entry(cache_table, &job_id, sizeof(job_id)))) {
-		LOG("[JobId=%u] Unable to resolve job", job_id);
-		return;
-	}
-
-	_check_job_magic(js);
-
-	LOG("[%pJ] Resolved from JobId=%u", JOB_STATE_MIMIC_RECORD(js), job_id);
-
-	if (_add_cache_job(args, js))
-		return;
-
-	if (!resolve) {
-		LOG("[%pJ] Not fully resolving job", JOB_STATE_MIMIC_RECORD(js));
-		return;
-	}
-
-	if ((js->array_job_id > 0) && (js->array_job_id == js->job_id)) {
-		array_job_state_cached_t *ajs;
-
-		ajs = xahash_find_entry(array_job_cache_table,
-					&js->array_job_id,
-					sizeof(js->array_job_id));
-
-		_check_array_job_magic(ajs);
-
-		_log_array_job_chain(js, __func__,
-				     "Resolved %pJ with next: JobId=%u",
-				     ARRAY_JOB_STATE_MIMIC_RECORD(ajs),
-				     ajs->next_job_id);
-
-		while (ajs->next_job_id != js->array_job_id) {
-			const job_state_cached_t *next;
-
-			if ((next = xahash_find_entry(
-				     cache_table, &ajs->next_job_id,
-				     sizeof(ajs->next_job_id)))) {
-				LOG("[%pJ] Resolved to %pJ via %pJ",
-				    JOB_STATE_MIMIC_RECORD(js),
-				    JOB_STATE_MIMIC_RECORD(next),
-				    ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-				if (_add_cache_job(args, next))
-					break;
-			} else {
-				fatal_abort("Unable to resolve next_job_id");
-			}
-
-			ajs = xahash_find_entry(array_job_cache_table,
-						&ajs->next_job_id,
-						sizeof(ajs->next_job_id));
-		}
-
-		xassert(ajs->next_job_id == js->array_job_id);
-	} else if (js->het_job_id == js->job_id) {
-		for (uint32_t i = 1; i < MAX_JOB_ID; i++) {
-			const job_state_cached_t *hjs;
-			uint32_t het_job_id = js->het_job_id + i;
-
-			if ((hjs = xahash_find_entry(cache_table, &het_job_id,
-						     sizeof(het_job_id))) &&
-			    (hjs->het_job_id == js->het_job_id)) {
-				LOG("[%pJ] Resolved HetJobId=%u+%u to %pJ",
-				    JOB_STATE_MIMIC_RECORD(js), job_id, i,
-				    JOB_STATE_MIMIC_RECORD(hjs));
-				if (_add_cache_job(args, hjs))
-					break;
-			} else {
-				/*
-				 * Next job not found or not part of the HetJob
-				 */
-				break;
-			}
-		}
-	} else {
-		LOG("[%pJ] Nothing else to resolve",
-		    JOB_STATE_MIMIC_RECORD(js));
-	}
-}
-
-static void _find_job_state_cached_by_id(job_state_args_t *args,
-					 const slurm_selected_step_t *filter)
-{
-	char *filter_str = NULL;
-
-	if (_is_debug())
-		(void) fmt_job_id_string((slurm_selected_step_t *) filter,
-					 &filter_str);
-
-	if (!filter->step_id.job_id) {
-		/* 0 is never a valid job so just return now */
-		goto cleanup;
-	} else if (filter->step_id.job_id == NO_VAL) {
-		/* walk all jobs */
-		(void) xahash_foreach_entry(cache_table, _foreach_cache_job,
-					    args);
-		goto cleanup;
-	}
-
-	xassert(!((filter->array_task_id != NO_VAL) &&
-		  (filter->het_job_offset != NO_VAL)));
-
-	if (filter->array_task_id != NO_VAL) {
-		const array_task_state_cached_t *ats;
-
-		ats = xahash_find_entry(
-			array_task_cache_table,
-			ARRAY_TASK_STATE_KEY_SELECTED_STEP(filter),
-			ARRAY_TASK_STATE_KEY_BYTES);
-
-		if (ats) {
-			slurm_selected_step_t aj_filter =
-				SLURM_SELECTED_STEP_INITIALIZER;
-			aj_filter.step_id.job_id = ats->job_id;
-
-			_check_array_task_magic(ats);
-			LOG("[%pJ] Resolved from %s",
-			    ARRAY_TASK_STATE_MIMIC_RECORD(ats), filter_str);
-
-			_find_job_state_cached_by_id(args, &aj_filter);
-		} else {
-			LOG("[%s] Unable to resolve job", filter_str);
-		}
-	} else if (filter->het_job_offset != NO_VAL) {
-		_find_job_state_cached_by_job_id(args,
-						 (filter->step_id.job_id +
-						  filter->het_job_offset),
-						 false);
-	} else {
-		_find_job_state_cached_by_job_id(args, filter->step_id.job_id,
-						 true);
-	}
-
-cleanup:
-	xfree(filter_str);
-}
-
-static void _dump_job_state_cached(job_state_args_t *args,
-				   const uint32_t filter_jobs_count,
-				   const slurm_selected_step_t *filter_jobs_ptr)
-{
-	xassert(args->magic == MAGIC_JOB_STATE_ARGS);
-
-	slurm_rwlock_rdlock(&cache_lock);
-
-	if (!filter_jobs_count) {
-		(void) xahash_foreach_entry(cache_table, _foreach_cache_job,
-					    args);
-	} else {
-		for (int i = 0; !args->rc && (i < filter_jobs_count); i++)
-			_find_job_state_cached_by_id(args, &filter_jobs_ptr[i]);
-	}
-
-	slurm_rwlock_unlock(&cache_lock);
-}
-
 extern int dump_job_state(const uint32_t filter_jobs_count,
 			  const slurm_selected_step_t *filter_jobs_ptr,
 			  uint32_t *jobs_count_ptr,
@@ -656,27 +205,14 @@
 		.magic = MAGIC_JOB_STATE_ARGS,
 		.count_only = true,
 	};
-	/*
-	 * In order to avoid the time cost, we are not taking the cache_lock
-	 * to check if the cache_table pointer is !NULL as the cost of hitting
-	 * an invalid cached state here is very minor.
-	 */
-	bool use_cache = cache_table;
 
-	if (!use_cache)
-		lock_slurmctld(job_read_lock);
+	lock_slurmctld(job_read_lock);
 
 	/*
 	 * Loop once to grab the job count and then allocate the job array and
 	 * then populate the array.
 	 */
-
-	if (use_cache)
-		_dump_job_state_cached(&args, filter_jobs_count,
-				       filter_jobs_ptr);
-	else
-		_dump_job_state_locked(&args, filter_jobs_count,
-				       filter_jobs_ptr);
+	_dump_job_state_locked(&args, filter_jobs_count, filter_jobs_ptr);
 
 	if (args.count > 0) {
 		if (!try_xrecalloc(args.jobs, args.count, sizeof(*args.jobs))) {
@@ -690,737 +226,20 @@
 		args.count_only = false;
 		args.count = 0;
 
-		if (use_cache)
-			_dump_job_state_cached(&args, filter_jobs_count,
-					       filter_jobs_ptr);
-		else
-			_dump_job_state_locked(&args, filter_jobs_count,
-					       filter_jobs_ptr);
+		_dump_job_state_locked(&args, filter_jobs_count,
+				       filter_jobs_ptr);
 	}
 
 	*jobs_pptr = args.jobs;
 	*jobs_count_ptr = args.jobs_count;
 cleanup:
-	if (!use_cache)
-		unlock_slurmctld(job_read_lock);
+	unlock_slurmctld(job_read_lock);
 
 	return args.rc;
 }
 
-static void _sync_job_task_id_bitmap(const job_record_t *job_ptr,
-				     job_state_cached_t *js)
+extern void slurm_on_job_state_change(job_record_t *job_ptr, uint32_t new_state,
+				      const char *caller)
 {
-	if (!job_ptr->array_recs) {
-		if (job_ptr->array_task_id == NO_VAL) {
-			/* before array job is split */
-			LOG("[%pJ] ignoring array job without array_recs",
-			    JOB_STATE_MIMIC_RECORD(js));
-			return;
-		}
-
-		/* conversion from meta job to normal job */
-		xassert(!js->task_id_bitmap || (js->array_task_id == NO_VAL));
-
-		if (_is_debug() && js->task_id_bitmap) {
-			char *before = bit_fmt_full(js->task_id_bitmap);
-			LOG("[%pJ] releasing array task_id_bitmap[%lu]: %s",
-			    JOB_STATE_MIMIC_RECORD(js),
-			    bit_size(js->task_id_bitmap), before);
-			xfree(before);
-		}
-
-		/* bitmap removed by state change or was never there */
-		FREE_NULL_BITMAP(js->task_id_bitmap);
-
-		return;
-	}
-
-	if (!job_ptr->array_recs->task_id_bitmap) {
-		uint32_t task_cnt = job_ptr->array_recs->pend_run_tasks;
-
-		if ((job_ptr->job_state != JOB_PENDING) || !task_cnt) {
-			if (_is_debug() &&
-			    (job_ptr->job_state != JOB_PENDING) &&
-			    js->task_id_bitmap) {
-				char *before = bit_fmt_full(js->task_id_bitmap);
-				LOG("[%pJ] job no longer pending: releasing array task_id_bitmap[%lu]: %s",
-				    JOB_STATE_MIMIC_RECORD(js),
-				    bit_size(js->task_id_bitmap), before);
-				xfree(before);
-			}
-
-			if (_is_debug() && !task_cnt && js->task_id_bitmap)
-				LOG("[%pJ] pending array job without pending task count",
-				    JOB_STATE_MIMIC_RECORD(js));
-
-			/* bitmap removed by state change or was never there */
-			FREE_NULL_BITMAP(js->task_id_bitmap);
-
-			return;
-		}
-
-		if (js->task_id_bitmap &&
-		    (bit_size(js->task_id_bitmap) != task_cnt)) {
-			LOG("[%pJ] array job task_id_bitmap changed from %lu to %u",
-			    JOB_STATE_MIMIC_RECORD(js),
-			    (js->task_id_bitmap ? bit_size(js->task_id_bitmap) :
-						  0),
-			    task_cnt);
-			FREE_NULL_BITMAP(js->task_id_bitmap);
-		}
-
-		if (!js->task_id_bitmap) {
-			LOG("[%pJ] mimicking array without task_id_bitmap with new bitmap[%u]",
-			    JOB_STATE_MIMIC_RECORD(js), task_cnt);
-			js->task_id_bitmap = bit_alloc(task_cnt);
-		}
-
-		xassert(js->task_id_bitmap);
-		xassert(bit_size(js->task_id_bitmap) == task_cnt);
-
-		bit_set_all(js->task_id_bitmap);
-
-		if (_is_debug()) {
-			char *map = bit_fmt_full(js->task_id_bitmap);
-			LOG("[%pJ] mimicking array without bitmap as task_id_bitmap[%lu]: %s",
-			    JOB_STATE_MIMIC_RECORD(js),
-			    bit_size(js->task_id_bitmap), map);
-			xfree(map);
-		}
-
-		return;
-	}
-
-	if (js->task_id_bitmap &&
-	    (bit_size(js->task_id_bitmap) ==
-	     bit_size(job_ptr->array_recs->task_id_bitmap))) {
-		/* resync all bits */
-
-		if (_is_debug()) {
-			char *before = bit_fmt_full(js->task_id_bitmap);
-			char *after = bit_fmt_full(job_ptr->array_recs
-							   ->task_id_bitmap);
-			LOG("[%pJ] updating array task_id_bitmap[%lu]: %s -> %s",
-			    JOB_STATE_MIMIC_RECORD(js),
-			    bit_size(job_ptr->array_recs->task_id_bitmap),
-			    before, after);
-			xfree(before);
-			xfree(after);
-		}
-
-		bit_copybits(js->task_id_bitmap,
-			     job_ptr->array_recs->task_id_bitmap);
-	} else {
-		/* new bitmap or bit count changed */
-
-		if (_is_debug()) {
-			char *before =
-				(js->task_id_bitmap ?
-					 bit_fmt_full(js->task_id_bitmap) :
-					 NULL);
-			char *after =
-				(job_ptr->array_recs->task_id_bitmap ?
-					 bit_fmt_full(
-						 job_ptr->array_recs
-							 ->task_id_bitmap) :
-					 NULL);
-			LOG("[%pJ] new array task_id_bitmap[%lu]: %s -> %s",
-			    JOB_STATE_MIMIC_RECORD(js),
-			    (job_ptr->array_recs->task_id_bitmap ?
-				     bit_size(job_ptr->array_recs
-						      ->task_id_bitmap) :
-				     0),
-			    (before ? before : "∅"), (after ? after : "∅"));
-			xfree(before);
-			xfree(after);
-		}
-
-		FREE_NULL_BITMAP(js->task_id_bitmap);
-		js->task_id_bitmap =
-			bit_copy(job_ptr->array_recs->task_id_bitmap);
-	}
-}
-
-static void _link_array_job(const job_record_t *job_ptr, job_state_cached_t *js)
-{
-	array_task_state_cached_t *ats;
-	array_job_state_cached_t *ajs, *meta;
-	const uint32_t job_id = job_ptr->job_id;
-
-	xassert(!job_ptr->het_job_id);
-	xassert(job_ptr->array_job_id > 0);
-	xassert(js->array_job_id > 0);
-	xassert(js->array_job_id == job_ptr->array_job_id);
-	xassert(js->array_task_id == job_ptr->array_task_id);
-
-	ats = xahash_insert_entry(array_task_cache_table,
-				  ARRAY_TASK_STATE_KEY_JOB_PTR(job_ptr),
-				  ARRAY_TASK_STATE_KEY_BYTES);
-	_check_array_task_magic(ats);
-	xassert(ats->array_job_id == job_ptr->array_job_id);
-	xassert(ats->array_task_id == job_ptr->array_task_id);
-
-	ajs = xahash_insert_entry(array_job_cache_table, &job_id,
-				  sizeof(job_id));
-	_check_array_job_magic(ajs);
-
-	if (ajs->next_job_id != job_id) {
-		_log_array_job_chain(
-			js, __func__,
-			"skipping already linked array jobs next:JobId=%u",
-			ajs->next_job_id);
-		_check_array_job_magic_links(job_ptr->job_id,
-					     job_ptr->array_job_id, true);
-		return;
-	}
-
-	/* Newly inserted jobs only link to themselves */
-	xassert(ajs->next_job_id == job_id);
-	xassert(ajs->job_id == job_id);
-
-	/* need to add this job into linked list of jobs for array */
-	meta = xahash_insert_entry(array_job_cache_table,
-				   &job_ptr->array_job_id,
-				   sizeof(job_ptr->array_job_id));
-	_check_array_job_magic(meta);
-
-	if (job_ptr->job_id == job_ptr->array_job_id) {
-		/*
-		 * Can't link meta job to itself, so find if another job already
-		 * linked to meta and created a stub for the meta job.
-		 */
-
-		if (meta->next_job_id != meta->job_id) {
-			_log_array_job_chain(js, __func__,
-				"skipping already linked array meta job");
-			return;
-		} else {
-			_log_array_job_chain(js, __func__,
-				"skipping linking singular array meta job");
-			return;
-		}
-	}
-
-	xassert(meta != ajs);
-
-	ajs->next_job_id = meta->next_job_id;
-	meta->next_job_id = job_id;
-
-	_check_array_job_magic(ajs);
-	_check_array_job_magic(meta);
-	_log_array_job_chain(js, __func__, "linked to %pJ",
-			     ARRAY_JOB_STATE_MIMIC_RECORD(meta));
-	_check_array_job_magic_links(job_ptr->job_id, job_ptr->array_job_id,
-				     true);
-}
-
-/* remove ajs from linked list of array jobs */
-static void _unlink_array_job(const job_record_t *job_ptr,
-			      job_state_cached_t *js,
-			      array_job_state_cached_t *ajs)
-{
-	uint32_t job_id = js->job_id;
-	uint32_t array_job_id = js->array_job_id;
-	array_job_state_cached_t *next = NULL;
-
-	if (ajs->next_job_id == job_id) {
-		_log_array_job_chain(xahash_find_entry(cache_table, &job_id,
-						       sizeof(job_id)),
-				     __func__,
-				     "removing singular chain for %pJ",
-				     job_ptr);
-
-		if (!xahash_free_entry(array_job_cache_table, &job_id,
-				       sizeof(job_id)))
-			fatal_abort("Unable to remove %pJ after just finding it",
-				    ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-		return;
-	} else if (js->array_job_id == js->job_id) {
-		_log_array_job_chain(js, __func__,
-				     "skipping removal of meta which would orphan JobId=%u",
-				     ajs->next_job_id);
-		return;
-	}
-
-	next = ajs;
-
-	while (next->next_job_id != job_id) {
-		next = xahash_find_entry(array_job_cache_table,
-					 &next->next_job_id,
-					 sizeof(next->next_job_id));
-		_check_array_job_magic(next);
-	}
-
-	xassert(next->next_job_id == job_id);
-
-	_log_array_job_chain(js, __func__, "removing from link chain");
-
-	next->next_job_id = ajs->next_job_id;
-	ajs->next_job_id = job_id;
-
-	_log_array_job_chain(js, __func__,
-			     "array job chain removed for %pJ removal",
-			     job_ptr);
-
-	if (array_job_id > 0) {
-		_log_array_job_chain(xahash_find_entry(cache_table,
-						       &array_job_id,
-						       sizeof(array_job_id)),
-				     __func__,
-				     "removed %pJ from meta link chain",
-				     job_ptr);
-		_check_array_job_magic_links(job_id, array_job_id, false);
-	}
-
-	_check_array_job_magic(ajs);
-	_check_array_job_magic(next);
-
-	if (!xahash_free_entry(array_job_cache_table, &job_id, sizeof(job_id)))
-		fatal_abort("Unable to remove %pJ after just finding it",
-			    ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-
-	/* check for meta that only exists for this job */
-	if (array_job_id && (next->job_id == next->next_job_id)) {
-		job_state_cached_t *meta_js =
-			xahash_find_entry(cache_table, &array_job_id,
-					  sizeof(array_job_id));
-
-		/* should only be the meta in chain left */
-		xassert(next->job_id == array_job_id);
-		xassert(next->next_job_id == array_job_id);
-
-		if (meta_js) {
-			_log_array_job_chain(meta_js, __func__,
-					     "keeping meta job in chain after %pJ removal",
-					     job_ptr);
-		} else {
-			/*
-			 * if there is no JS for the meta job, then this chain
-			 * is only a placeholder that needs to be removed
-			 */
-
-			_log_array_job_chain(js, __func__,
-					     "removing meta job placeholder in chain after %pJ removal",
-					     job_ptr);
-
-			/* prune meta job that only exist to avoid orphans */
-			if (!xahash_free_entry(array_job_cache_table,
-					       &array_job_id,
-					       sizeof(array_job_id)))
-				fatal_abort("[JobId=%u] Unable to remove array meta job placeholder link",
-					    array_job_id);
-		}
-	}
-}
-
-static void _on_array_job_removal(const job_record_t *job_ptr,
-				  job_state_cached_t *js)
-{
-	array_job_state_cached_t *ajs;
-
-	/*
-	 * Need to use the cached array_task_id and not the potentially changed
-	 * job_ptr->array_task_id to remove the task cache
-	 */
-
-	xassert(js);
-	xassert(js->job_id == job_ptr->job_id);
-
-	if ((ajs = xahash_find_entry(array_job_cache_table, &js->job_id,
-				     sizeof(js->job_id)))) {
-		_check_array_job_magic(ajs);
-		xassert(ajs->job_id == job_ptr->job_id);
-
-		_check_array_job_magic_links(js->job_id, js->array_job_id,
-					     true);
-		_unlink_array_job(job_ptr, js, ajs);
-	} else {
-		xassert(!js->array_job_id);
-	}
-
-	if (js->array_job_id > 0) {
-		if (!xahash_free_entry(
-			    array_task_cache_table,
-			    ARRAY_TASK_STATE_KEY_JOB_ID(js->array_job_id,
-							js->array_task_id),
-			    ARRAY_TASK_STATE_KEY_BYTES))
-			fatal_abort("[%pJ] array task cache not found",
-				    JOB_STATE_MIMIC_RECORD(js));
-
-		LOG("[%pJ] array task cache removed for %pJ",
-		    JOB_STATE_MIMIC_RECORD(js), job_ptr);
-	}
-
-	if (_is_debug()) {
-		array_task_state_cached_t *ats;
-
-		ats = xahash_find_entry(array_task_cache_table,
-					ARRAY_TASK_STATE_KEY_JOB_ID(
-						js->job_id, js->array_task_id),
-					ARRAY_TASK_STATE_KEY_BYTES);
-		ajs = xahash_find_entry(array_job_cache_table, &job_ptr->job_id,
-					sizeof(job_ptr->job_id));
-
-		if (ats)
-			fatal_abort("found array task when there should not be one: %pJ",
-				    ARRAY_TASK_STATE_MIMIC_RECORD(ats));
-		if (ajs && (js->job_id != js->array_job_id))
-			fatal_abort("found array job link when there should not be one: %pJ",
-				    ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-	}
-}
-
-static void _on_array_job_change(const job_record_t *job_ptr,
-				 job_state_cached_t *js)
-{
-	xassert(!js->het_job_id);
-
-	/*
-	 * Array IDs can change during meta array job's life.
-	 * This could cause any of the existing array array_job_id/task_id ->
-	 * job_id or array_job chain to be incorrect and they need to be
-	 * rebuilt.
-	 */
-
-	if ((js->array_task_id != job_ptr->array_task_id) ||
-	    (js->array_job_id != job_ptr->array_job_id)) {
-		/* task id should only ever change from meta to a numeric */
-		xassert(js->array_task_id == NO_VAL);
-		xassert(!js->het_job_id);
-		xassert(!job_ptr->het_job_id);
-		xassert(job_ptr->array_job_id > 0);
-		LOG("[%pJ] changed array_task_id=%u->%u array_job_id=%u->%u",
-		    JOB_STATE_MIMIC_RECORD(js), js->array_task_id,
-		    job_ptr->array_task_id, js->array_job_id,
-		    job_ptr->array_job_id);
-
-		/* Remove task cache but leave the meta job link intact */
-		if (js->array_job_id &&
-		    !xahash_free_entry(
-			    array_task_cache_table,
-			    ARRAY_TASK_STATE_KEY_JOB_ID(js->array_job_id,
-							js->array_task_id),
-			    ARRAY_TASK_STATE_KEY_BYTES))
-			fatal_abort("[%pJ] array task cache not found",
-				    JOB_STATE_MIMIC_RECORD(js));
-	}
-
-	xassert(js->job_id == job_ptr->job_id);
-	js->array_task_id = job_ptr->array_task_id;
-	js->array_job_id = job_ptr->array_job_id;
-
-	_sync_job_task_id_bitmap(job_ptr, js);
-	_link_array_job(job_ptr, js);
-}
-
-extern void on_job_state_change(job_record_t *job_ptr, uint32_t new_state)
-{
-	DEF_DEBUG_TIMER;
-	job_state_cached_t *js = NULL;
-	const uint32_t job_id = job_ptr->job_id;
-
-	/*
-	 * In order to avoid the time cost, we are not taking the cache_lock
-	 * to check if the cache_table pointer is !NULL as the cost of hitting
-	 * an invalid cached state here is very minor.
-	 */
-	if (!cache_table)
-		return;
-
-	if (!job_id)
-		return;
-
-	START_DEBUG_TIMER;
-
-	xassert(job_ptr->magic == JOB_MAGIC);
-
-	slurm_rwlock_wrlock(&cache_lock);
-
-	_check_all_jobs(false);
-
-	if (new_state == NO_VAL) {
-		js = xahash_find_entry(cache_table, &job_id, sizeof(job_id));
-
-		if (js && (js->array_job_id > 0))
-			_on_array_job_removal(job_ptr, js);
-
-		if (xahash_free_entry(cache_table, &job_id, sizeof(job_id)))
-			LOG("[%pJ] job state cache removed", job_ptr);
-		else
-			LOG("[%pJ] job state cache not found", job_ptr);
-
-		_check_all_jobs(false);
-
-		slurm_rwlock_unlock(&cache_lock);
-		END_DEBUG_TIMER;
-		return;
-	}
-
-	js = xahash_insert_entry(cache_table, &job_id, sizeof(job_id));
-	xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
-	if (_is_debug() && (js->job_state != new_state)) {
-		char *before = job_state_string_complete(js->job_state);
-		char *after = job_state_string_complete(job_ptr->job_state);
-
-		LOG("[%pJ] changed state: %s -> %s",
-		    JOB_STATE_MIMIC_RECORD(js), before, after);
-
-		xfree(before);
-		xfree(after);
-	}
-
-	js->job_state = new_state;
-
-	if (job_ptr->array_job_id || js->array_job_id)
-		_on_array_job_change(job_ptr, js);
-
-	/*
-	 * A het job is added to the job state cache after each component is
-	 * created. Because this cache exists outside of the job read/write
-	 * locks, that means a job state cache query can happen while a het
-	 * job is being created, and het job id may not be set for each
-	 * component yet. In that case, check that the het job state cache has
-	 * not been initialized yet.
-	 */
-
-	if (_is_debug() && (js->het_job_id != job_ptr->het_job_id)) {
-		xassert(!js->het_job_id);
-		xassert(js->array_task_id == NO_VAL);
-		xassert(!js->array_job_id);
-		xassert(job_ptr->array_task_id == NO_VAL);
-		xassert(!job_ptr->array_job_id);
-		LOG("[%pJ] changed het_job_id=%u->%u",
-		    JOB_STATE_MIMIC_RECORD(js), js->het_job_id,
-		    job_ptr->het_job_id);
-	}
-
-	js->het_job_id = job_ptr->het_job_id;
-
-	_check_all_jobs(false);
-
-	slurm_rwlock_unlock(&cache_lock);
-	END_DEBUG_TIMER;
-}
-
-static xahash_hash_t _hash(const void *key, const size_t key_bytes,
-			   void *state_ptr)
-{
-	cache_table_state_t *state = state_ptr;
-	const uint32_t *job_id_ptr = key;
-
-	_check_state_magic(state);
-	_check_job_id(job_id_ptr);
-	xassert(sizeof(*job_id_ptr) == key_bytes);
-
-	return *job_id_ptr % state->table_size;
-}
-
-static bool _match(void *entry, const void *key, const size_t key_bytes,
-		   void *state_ptr)
-{
-	job_state_cached_t *js = entry;
-	cache_table_state_t *state = state_ptr;
-	const uint32_t *job_id_ptr = key;
-
-	_check_job_magic(js);
-	_check_state_magic(state);
-	_check_job_id(job_id_ptr);
-	xassert(sizeof(*job_id_ptr) == key_bytes);
-
-	return (js->job_id == *job_id_ptr);
-}
-
-static void _on_insert(void *entry, const void *key, const size_t key_bytes,
-		       void *state_ptr)
-{
-	job_state_cached_t *js = entry;
-	const uint32_t *job_id_ptr = key;
-	cache_table_state_t *state = state_ptr;
-
-	_check_state_magic(state);
-	_check_job_id(job_id_ptr);
-	xassert(sizeof(*job_id_ptr) == key_bytes);
-
-	*js = (job_state_cached_t) {
-		ONLY_DEBUG(.magic = MAGIC_JOB_STATE_CACHED,)
-		.job_id = *job_id_ptr,
-		.job_state = NO_VAL,
-		.array_task_id = NO_VAL,
-	};
-
-	LOG("%pJ inserted", JOB_STATE_MIMIC_RECORD(js));
-	_check_job_magic(js);
-}
-
-static void _on_free(void *ptr, void *state_ptr)
-{
-	job_state_cached_t *js = ptr;
-	cache_table_state_t *state = state_ptr;
-
-	_check_job_magic(js);
-	_check_state_magic(state);
-
-	LOG("%pJ releasing", JOB_STATE_MIMIC_RECORD(js));
-
-	FREE_NULL_BITMAP(js->task_id_bitmap);
-	ONLY_DEBUG(memset(js, 0, sizeof(*js)));
-	ONLY_DEBUG(js->magic = ~MAGIC_JOB_STATE_CACHED);
-}
-
-static bool _array_job_match(void *entry, const void *key,
-			     const size_t key_bytes, void *state_ptr)
-{
-	array_job_state_cached_t *ajs = entry;
-	cache_table_state_t *state = state_ptr;
-	const uint32_t *job_id_ptr = key;
-
-	_check_state_magic(state);
-	_check_array_job_magic(ajs);
-	_check_job_id(job_id_ptr);
-	xassert(sizeof(*job_id_ptr) == key_bytes);
-
-	return (ajs->job_id == *job_id_ptr);
-}
-
-static void _array_job_on_insert(void *entry, const void *key,
-				 const size_t key_bytes, void *state_ptr)
-{
-	array_job_state_cached_t *ajs = entry;
-	cache_table_state_t *state = state_ptr;
-	const uint32_t *job_id_ptr = key;
-
-	_check_state_magic(state);
-	_check_job_id(job_id_ptr);
-	xassert(sizeof(*job_id_ptr) == key_bytes);
-
-	*ajs = (array_job_state_cached_t) {
-		ONLY_DEBUG(.magic = MAGIC_ARRAY_JOB_STATE_CACHED,)
-		.job_id = *job_id_ptr,
-		.next_job_id = *job_id_ptr,
-	};
-
-	LOG("%pJ inserted", ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-	_check_array_job_magic(ajs);
-}
-
-static void _array_job_on_free(void *ptr, void *state_ptr)
-{
-	array_job_state_cached_t *ajs = ptr;
-	cache_table_state_t *state = state_ptr;
-
-	_check_state_magic(state);
-	_check_array_job_magic(ajs);
-
-	LOG("%pJ released", ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-	ONLY_DEBUG(memset(ajs, 0, sizeof(*ajs)));
-	ONLY_DEBUG(ajs->magic = ~MAGIC_ARRAY_JOB_STATE_CACHED);
-}
-
-static xahash_hash_t _array_task_hash(const void *key, const size_t key_bytes,
-				      void *state_ptr)
-{
-	uint64_t seed;
-	cache_table_state_t *state = state_ptr;
-	const array_task_state_cached_t *ats_key = key;
-
-	_check_state_magic(state);
-	_check_array_task_magic(ats_key);
-
-	seed = ((uint64_t) ats_key->array_job_id) << 32;
-	seed |= ats_key->array_task_id;
-
-	return seed % state->table_size;
-}
-
-static bool _array_task_match(void *entry, const void *key,
-			      const size_t key_bytes, void *state_ptr)
-{
-	array_task_state_cached_t *ats = entry;
-	cache_table_state_t *state = state_ptr;
-	const array_task_state_cached_t *ats_key = key;
-
-	_check_state_magic(state);
-	_check_array_task_magic(ats);
-	_check_array_task_magic(ats_key);
-	xassert(sizeof(*ats_key) == key_bytes);
-
-	/* treat NO_VAL and INFINITE as * for arrays */
-	if ((ats_key->array_task_id < NO_VAL) &&
-	    (ats->array_task_id != ats_key->array_task_id))
-		return false;
-
-	return (ats->array_job_id == ats_key->array_job_id);
-}
-
-static void _array_task_on_insert(void *entry, const void *key,
-				  const size_t key_bytes, void *state_ptr)
-{
-	array_task_state_cached_t *ats = entry;
-	const array_task_state_cached_t *ats_key = key;
-	cache_table_state_t *state = state_ptr;
-
-	_check_state_magic(state);
-	_check_array_task_magic(ats_key);
-	xassert(sizeof(*ats_key) == key_bytes);
-
-	*ats = *ats_key;
-
-	LOG("%pJ inserted", ARRAY_TASK_STATE_MIMIC_RECORD(ats));
-	_check_array_task_magic(ats);
-}
-
-static void _array_task_on_free(void *ptr, void *state_ptr)
-{
-	array_task_state_cached_t *ats = ptr;
-	cache_table_state_t *state = state_ptr;
-
-	_check_state_magic(state);
-	_check_array_task_magic(ats);
-
-	LOG("%pJ released", ARRAY_TASK_STATE_MIMIC_RECORD(ats));
-	ONLY_DEBUG(memset(ats, 0, sizeof(*ats)));
-	ONLY_DEBUG(ats->magic = ~MAGIC_ARRAY_TASK_STATE_CACHED);
-}
-
-extern void setup_job_state_hash(int new_hash_table_size)
-{
-	const cache_table_state_t nstate = (cache_table_state_t) {
-		ONLY_DEBUG(.magic = MAGIC_CACHE_TABLE_STATE,)
-		.table_size = new_hash_table_size,
-	};
-
-	LOG("Job state cache active with %d jobs in hash tables",
-	    new_hash_table_size);
-
-	slurm_rwlock_wrlock(&cache_lock);
-
-	xassert(!cache_table);
-	cache_table = xahash_new_table(_hash, _match, _on_insert, _on_free,
-				       sizeof(cache_table_state_t),
-				       sizeof(job_state_cached_t),
-				       new_hash_table_size);
-	*((cache_table_state_t *) xahash_get_state_ptr(cache_table)) = nstate;
-
-	xassert(!array_job_cache_table);
-	array_job_cache_table =
-		xahash_new_table(_hash, _array_job_match,
-				 _array_job_on_insert, _array_job_on_free,
-				 sizeof(cache_table_state_t),
-				 sizeof(array_job_state_cached_t),
-				 new_hash_table_size);
-	*((cache_table_state_t *) xahash_get_state_ptr(array_job_cache_table)) =
-		nstate;
-
-	xassert(!array_task_cache_table);
-	array_task_cache_table =
-		xahash_new_table(_array_task_hash, _array_task_match,
-				 _array_task_on_insert, _array_task_on_free,
-				 sizeof(cache_table_state_t),
-				 sizeof(array_task_state_cached_t),
-				 new_hash_table_size);
-	*((cache_table_state_t *)
-		  xahash_get_state_ptr(array_task_cache_table)) = nstate;
-
-	slurm_rwlock_unlock(&cache_lock);
+	_log_job_state_change(job_ptr, new_state, caller);
 }
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index b36586b..c0ba8ed 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -552,38 +552,58 @@
  */
 extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid );
 
+/* Call job_state_set() instead */
+extern void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+				const char *caller);
+
 /*
  * Set job state
  * IN job_ptr - Job to update
  * IN state - state from enum job_states
  */
-extern void job_state_set(job_record_t *job_ptr, uint32_t state);
+#define job_state_set(job_ptr, state) \
+	slurm_job_state_set((job_ptr), (state), __func__)
+
+/* Call job_state_set_flag() instead */
+extern void slurm_job_state_set_flag(job_record_t *job_ptr, uint32_t flag,
+				     const char *caller);
 
 /*
  * Set job state flag
  * IN job_ptr - Job to update
  * IN flag - flag to set (from JOB_* macro)
  */
-extern void job_state_set_flag(job_record_t *job_ptr, uint32_t flag);
+#define job_state_set_flag(job_ptr, flag) \
+	slurm_job_state_set_flag((job_ptr), (flag), __func__)
+
+/* Call job_state_set_flag() instead */
+extern void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+				       const char *caller);
 
 /*
  * Unset job state flag
  * IN job_ptr - Job to update
  * IN flag - flag to unset (from JOB_* macro)
  */
-extern void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag);
+#define job_state_unset_flag(job_ptr, flag) \
+	slurm_job_state_unset_flag((job_ptr), (flag), __func__)
 
 /* dump_all_job_state - save the state of all jobs to file
  * RET 0 or error code */
 extern int dump_all_job_state ( void );
 
+/* Call on_job_state_change() instead */
+extern void slurm_on_job_state_change(job_record_t *job_ptr, uint32_t new_state,
+				      const char *caller);
+
 /*
  * Notify/update job state hash table that job state has changed
  * IN job_ptr - Job about to be updated
  * IN new_state - New value that will be assigned to job_ptr->job_state.
  *                If NO_VAL, then delete the cache entry.
  */
-extern void on_job_state_change(job_record_t *job_ptr, uint32_t new_state);
+#define on_job_state_change(job_ptr, new_state) \
+	slurm_on_job_state_change((job_ptr), (new_state), __func__)
 
 /* dump_all_node_state - save the state of all nodes to file */
 extern int dump_all_node_state ( void );
@@ -1581,12 +1601,6 @@
  */
 extern void rehash_jobs(void);
 
-/*
- * Setup and prepare job state cache (if configured)
- * IN new_hash_table_size - number of entries in hash table
- */
-extern void setup_job_state_hash(int new_hash_table_size);
-
 /* update first assigned job id as needed on reconfigure */
 extern void reset_first_job_id(void);
 
diff --git a/src/slurmctld/state_save.c b/src/slurmctld/state_save.c
index d4ae016..eb73c78 100644
--- a/src/slurmctld/state_save.c
+++ b/src/slurmctld/state_save.c
@@ -45,7 +45,13 @@
 
 #include <pthread.h>
 
+#include "src/common/log.h"
 #include "src/common/macros.h"
+#include "src/common/probes.h"
+#include "src/common/slurm_protocol_defs.h"
+#include "src/common/slurm_time.h"
+#include "src/common/timers.h"
+
 #include "src/slurmctld/reservation.h"
 #include "src/slurmctld/slurmctld.h"
 #include "src/slurmctld/trigger_mgr.h"
@@ -55,11 +61,24 @@
 #define SAVE_MAX_WAIT	5
 #endif
 
+#define SAVE_COUNT_DELAY \
+	((timespec_t) { \
+		.tv_sec = 1, \
+	})
+#define STATESAVE_WARN_TS \
+	((timespec_t) { \
+		.tv_nsec = (NSEC_IN_SEC / 2), \
+	})
+#define CTIME_STR_LEN 72
+
 static pthread_mutex_t state_save_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t  state_save_cond = PTHREAD_COND_INITIALIZER;
 static int save_jobs = 0, save_nodes = 0, save_parts = 0;
 static int save_triggers = 0, save_resv = 0;
 static bool run_save_thread = true;
+static latency_histogram_t save_histogram = LATENCY_HISTOGRAM_INITIALIZER;
+static timespec_t last_save = { 0, 0 };
+static timespec_t save_start = { 0, 0 };
 
 /* Queue saving of job state information */
 extern void schedule_job_save(void)
@@ -115,6 +134,71 @@
 	slurm_mutex_unlock(&state_save_lock);
 }
 
+static void _check_slow_save(void)
+{
+	timespec_diff_ns_t tdiff = { { 0 } };
+	bool warn = false;
+	char delay_str[CTIME_STR_LEN] = { 0 };
+
+	tdiff = timespec_diff_ns(last_save, save_start);
+	xassert(tdiff.after);
+
+	latency_metric_add_histogram_value(&save_histogram, tdiff.diff);
+
+	if (!(warn = timespec_is_after(tdiff.diff, STATESAVE_WARN_TS)))
+		return;
+
+	(void) timespec_ctime(tdiff.diff, false, delay_str, sizeof(delay_str));
+	warning("Saving to StateSaveLocation took %s. Please check backing filesystem as all of Slurm operations are delayed due to slow StateSaveLocation writes.",
+		delay_str);
+}
+
+static void _probe_verbose(probe_log_t *log)
+{
+	char histogram[LATENCY_METRIC_HISTOGRAM_STR_LEN] = { 0 };
+	char ts_str[CTIME_STR_LEN] = { 0 };
+
+	if (timespec_is_after(save_start, last_save)) {
+		(void) timespec_ctime(save_start, true, ts_str, sizeof(ts_str));
+		probe_log(log, "StateSave Status: SAVING");
+		probe_log(log, "StateSave Started: %s", ts_str);
+	} else {
+		(void) timespec_ctime(timespec_rem(last_save, save_start),
+				      false, ts_str, sizeof(ts_str));
+		probe_log(log, "StateSave Status: SLEEPING");
+		probe_log(log, "StateSave Last Duration: %s", ts_str);
+	}
+
+	(void) timespec_ctime(last_save, true, ts_str, sizeof(ts_str));
+	probe_log(log, "StateSave Last Save: %s", ts_str);
+
+	(void) latency_histogram_print_labels(histogram, sizeof(histogram));
+	probe_log(log, "StateSave Histogram: %s", histogram);
+
+	(void) latency_histogram_print(&save_histogram, histogram,
+				       sizeof(histogram));
+	probe_log(log, "StateSave Histogram: %s", histogram);
+}
+
+static probe_status_t _probe(probe_log_t *log)
+{
+	probe_status_t status = PROBE_RC_UNKNOWN;
+
+	slurm_mutex_lock(&state_save_lock);
+
+	if (log)
+		_probe_verbose(log);
+
+	if (!last_save.tv_sec)
+		status = PROBE_RC_ONLINE;
+	else
+		status = PROBE_RC_READY;
+
+	slurm_mutex_unlock(&state_save_lock);
+
+	return status;
+}
+
 /*
  * Run as pthread to keep saving slurmctld state information as needed,
  * Use schedule_job_save(),  schedule_node_save(), and schedule_part_save()
@@ -124,8 +208,6 @@
  */
 extern void *slurmctld_state_save(void *no_data)
 {
-	time_t last_save = 0, now;
-	double save_delay;
 	bool run_save;
 	int save_count;
 
@@ -135,34 +217,47 @@
 	}
 #endif
 
+	probe_register(__func__, _probe);
+
 	while (1) {
 		/* wait for work to perform */
 		slurm_mutex_lock(&state_save_lock);
 		while (1) {
+			timespec_diff_ns_t save_delay = { { 0 } };
+
+			if (last_save.tv_sec) {
+				save_delay = timespec_diff_ns(timespec_now(),
+							      save_start);
+				xassert(save_delay.after);
+			}
+
 			save_count = save_jobs + save_nodes + save_parts +
 				     save_resv + save_triggers;
-			now = time(NULL);
-			save_delay = difftime(now, last_save);
+
 			if (save_count &&
-			    (!run_save_thread ||
-			     (save_delay >= SAVE_MAX_WAIT))) {
-				last_save = now;
+			    (!run_save_thread || !last_save.tv_sec ||
+			     (timespec_to_secs(save_delay.diff) >=
+			      SAVE_MAX_WAIT))) {
 				break;		/* do the work */
 			} else if (!run_save_thread) {
 				run_save_thread = true;
 				slurm_mutex_unlock(&state_save_lock);
 				return NULL;	/* shutdown */
 			} else if (save_count) { /* wait for a timeout */
-				struct timespec ts = {0, 0};
-				ts.tv_sec = now + 1;
+				timespec_t delay =
+					timespec_add(timespec_now(),
+						     SAVE_COUNT_DELAY);
+
 				slurm_cond_timedwait(&state_save_cond,
-					  	     &state_save_lock, &ts);
-			} else {		/* wait for more work */
+						     &state_save_lock, &delay);
+			} else { /* wait for more work */
 				slurm_cond_wait(&state_save_cond,
 					  	&state_save_lock);
 			}
 		}
 
+		save_start = timespec_now();
+
 		/* save job info if necessary */
 		run_save = false;
 		/* slurm_mutex_lock(&state_save_lock); done above */
@@ -217,5 +312,10 @@
 		slurm_mutex_unlock(&state_save_lock);
 		if (run_save)
 			(void)trigger_state_save();
+
+		slurm_mutex_lock(&state_save_lock);
+		last_save = timespec_now();
+		_check_slow_save();
+		slurm_mutex_unlock(&state_save_lock);
 	}
 }
diff --git a/src/slurmd/slurmd/job_mem_limit.c b/src/slurmd/slurmd/job_mem_limit.c
index 0d13fd0..55dee9f 100644
--- a/src/slurmd/slurmd/job_mem_limit.c
+++ b/src/slurmd/slurmd/job_mem_limit.c
@@ -129,34 +129,34 @@
 {
 	step_loc_t *stepd = x;
 	int fd;
-	slurmstepd_mem_info_t stepd_mem_info;
+	uint64_t job_mem_limit = 0;
 
 	fd = stepd_connect(stepd->directory, stepd->nodename, &stepd->step_id,
 			   &stepd->protocol_version);
 	if (fd == -1)
 		return 1; /* step completed */
 
-	if (stepd_get_mem_limits(fd, stepd->protocol_version,
-				 &stepd_mem_info) != SLURM_SUCCESS) {
+	if (stepd_get_mem_limit(fd, stepd->protocol_version, &job_mem_limit) !=
+	    SLURM_SUCCESS) {
 		error("Error reading %ps memory limits from slurmstepd",
 		      &stepd->step_id);
 		close(fd);
 		return 1;
 	}
 
-	if (stepd_mem_info.job_mem_limit) {
+	if (job_mem_limit) {
 		job_mem_limits_t *limits =
 			list_find_first(job_limits_list, _match_job,
 					&stepd->step_id.job_id);
 
 		if (limits) {
-			if (stepd_mem_info.job_mem_limit > limits->job_mem)
-				limits->job_mem = stepd_mem_info.job_mem_limit;
+			if (job_mem_limit > limits->job_mem)
+				limits->job_mem = job_mem_limit;
 		} else {
 			/* create entry for this step */
 			limits = xmalloc(sizeof(*limits));
 			limits->job_id = stepd->step_id.job_id;
-			limits->job_mem = stepd_mem_info.job_mem_limit;
+			limits->job_mem = job_mem_limit;
 			debug2("%s: RecLim JobId=%u job_mem:%"PRIu64,
 			       __func__, stepd->step_id.job_id, limits->job_mem);
 			list_append(job_limits_list, limits);
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index b847bd5..e604772 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -822,7 +822,6 @@
 static int _handle_mem_limits(int fd, uid_t uid, pid_t remote_pid)
 {
 	safe_write(fd, &step->job_mem, sizeof(uint64_t));
-	safe_write(fd, &step->step_mem, sizeof(uint64_t));
 
 	return SLURM_SUCCESS;
 rwfail:
@@ -2357,13 +2356,20 @@
 	slurm_mutex_unlock(&extern_thread_lock);
 
 	for (int i = 0; i < thread_cnt; i++) {
+		pthread_t thread = 0;
 		debug2("Joining extern pid thread %d", i);
-		slurm_thread_join(extern_threads[i]);
+
+		slurm_mutex_lock(&extern_thread_lock);
+		SWAP(thread, extern_threads[i]);
+		extern_thread_cnt--;
+		slurm_mutex_unlock(&extern_thread_lock);
+
+		slurm_thread_join(thread);
 	}
 
 	slurm_mutex_lock(&extern_thread_lock);
 	xfree(extern_threads);
-	extern_thread_cnt = 0;
+	xassert(!extern_thread_cnt);
 	slurm_mutex_unlock(&extern_thread_lock);
 
 	debug2("Done joining extern pid threads");
diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c
index f478023..1b75f5d 100644
--- a/src/slurmd/slurmstepd/slurmstepd.c
+++ b/src/slurmd/slurmstepd/slurmstepd.c
@@ -847,6 +847,45 @@
 	return 0;
 }
 
+/* do nothing */
+static void _ns_on_sigchld(int signo) {}
+
+/*
+ * Reap all adopted processes forever
+ *
+ * If creating new PID namespaces with namespaces/linux, processes that
+ * terminate inside the new PID namespace may become children of this
+ * slurmstepd. Such processes that would normally be reaped by the original init
+ * process now need to be reaped by this slurmstepd.
+ */
+static void _reap_adopted_processes(void)
+{
+	struct sigaction sa = {
+		.sa_handler = _ns_on_sigchld,
+	};
+	sigset_t mask;
+
+	/* Unblock SIGCHLD if sigmask was inherited with it blocked */
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGCHLD);
+	pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
+
+	/*
+	 * Override default ignore behavior for SIGCHLD by adding empty handler
+	 * function.
+	 */
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGCHLD, &sa, NULL);
+
+	while (true) {
+		/* Block until SIGCHLD (or any signal) is received */
+		pause();
+		/* Cleanup any terminated processes (if any) */
+		while (waitpid(-1, NULL, WNOHANG) > 0)
+			;
+	}
+}
+
 /*
  *  Process special "modes" of slurmstepd passed as cmdline arguments.
  */
@@ -870,7 +909,9 @@
 		setproctitle("%s", buf);
 		xfree(buf);
 		set_oom_adj(STEPD_OOM_ADJ);
-		(void) poll(NULL, 0, -1);
+
+		_reap_adopted_processes();
+
 		fini_setproctitle();
 		exit(0);
 	}
@@ -927,7 +968,7 @@
 	safe_read(sock, &ok, sizeof(int));
 	return;
 rwfail:
-	error("Unable to receive \"ok ack\" to slurmd");
+	error("Unable to receive \"ok ack\" from slurmd");
 #endif
 }
 
diff --git a/testsuite/README b/testsuite/README
index c6ff847..dfbc7df 100644
--- a/testsuite/README
+++ b/testsuite/README
@@ -1088,6 +1088,10 @@
 test_152_1   Test IMEX channel allocation/deallocation
 test_152_2   Test --network=unique-channel-per-segment
 
+test_153_#   Testing Metrics/Type.
+========================================
+test_153_1   Test basic metrics/openmetrics
+
 test_154_#   Testing HRes.
 ========================================
 test_154_1   Test HRes MODE_3
diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91
index f78d94b..baa28b4 100755
--- a/testsuite/expect/test1.91
+++ b/testsuite/expect/test1.91
@@ -217,14 +217,16 @@
 # Run a job step to get allocated processor count
 #
 set task_cnt 0
-send "$srun -c1 ./$file_prog\r"
+send "$srun -c1 ./$file_prog; echo EXIT_CODE:$?\r"
 expect {
 	-re "TASK_ID:($number),MASK:($number)" {
 		incr task_cnt
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "salloc not responding or failure to recognize prompt"
@@ -241,14 +243,16 @@
 #
 set expected_mask [ expr ((1 << $task_cnt) - 1) ]
 set task_mask 0
-send "$srun -c1 -n $task_cnt --cpu-bind=threads ./$file_prog\r"
+send "$srun -c1 -n $task_cnt --cpu-bind=threads ./$file_prog; echo EXIT_CODE:$?\r"
 expect {
 	-re "TASK_ID:($number),MASK:($number)" {
 		incr task_mask $expect_out(2,string)
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -276,14 +280,16 @@
 log_debug "core_mask: $expected_mask"
 
 set task_mask 0
-send "$srun -c1 -n $core_task_cnt -B 1:1:1 ./$file_prog\r"
+send "$srun -c1 -n $core_task_cnt -B 1:1:1 ./$file_prog; echo EXIT_CODE:$?\r"
 expect {
 	-re "TASK_ID:($number),MASK:($number)" {
 		incr task_mask $expect_out(2,string)
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -306,7 +312,7 @@
 	set num_tasks 0
 	set num_bits  0
 	set task_mask 0
-	send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads ./$file_prog\r"
+	send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads ./$file_prog; echo EXIT_CODE:$?\r"
 	expect {
 		-re "TASK_ID:($number),MASK:($number)" {
 			incr task_mask $expect_out(2,string)
@@ -321,8 +327,10 @@
 			}
 			exp_continue
 		}
-		-re "error" {
-			fail "Some error occurred"
+		-re "EXIT_CODE:(\d+)" {
+			if {[$expect_out(1,string)] != 0} {
+				fail "Some error occurred"
+			}
 		}
 		timeout {
 			fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -350,7 +358,7 @@
 	set num_tasks 0
 	set num_bits  0
 	set task_mask 0
-	send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads ./$file_prog\r"
+	send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads ./$file_prog; echo EXIT_CODE:$?\r"
 	expect {
 		-re "TASK_ID:($number),MASK:($number)" {
 			incr task_mask $expect_out(2,string)
@@ -365,8 +373,10 @@
 			}
 			exp_continue
 		}
-		-re "error" {
-			fail "Some error occurred"
+		-re "EXIT_CODE:(\d+)" {
+			if {[$expect_out(1,string)] != 0} {
+				fail "Some error occurred"
+			}
 		}
 		timeout {
 			fail "salloc not responding or failure to recognize prompt"
@@ -394,7 +404,7 @@
 	set num_tasks 0
 	set num_bits  0
 	set task_mask 0
-	send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt ./$file_prog\r"
+	send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt ./$file_prog; echo EXIT_CODE:$?\r"
 	expect {
 		-re "TASK_ID:($number),MASK:($number)" {
 			incr task_mask $expect_out(2,string)
@@ -409,8 +419,10 @@
 			}
 			exp_continue
 		}
-		-re "error" {
-			fail "Some error occurred"
+		-re "EXIT_CODE:(\d+)" {
+			if {[$expect_out(1,string)] != 0} {
+				fail "Some error occurred"
+			}
 		}
 		timeout {
 			fail "salloc not responding or failure to recognize prompt"
@@ -437,7 +449,7 @@
 	set num_tasks 0
 	set num_bits  0
 	set task_mask 0
-	send "$srun -c$this_cnt -B 1:1:1 ./$file_prog\r"
+	send "$srun -c$this_cnt -B 1:1:1 ./$file_prog; echo EXIT_CODE:$?\r"
 	expect {
 		-re "TASK_ID:($number),MASK:($number)" {
 			incr task_mask $expect_out(2,string)
@@ -452,8 +464,10 @@
 			}
 			exp_continue
 		}
-		-re "error" {
-			fail "Some error occurred"
+		-re "EXIT_CODE:(\d+)" {
+			if {[$expect_out(1,string)] != 0} {
+				fail "Some error occurred"
+			}
 		}
 		timeout {
 			fail "salloc not responding or failure to recognize prompt"
@@ -477,14 +491,16 @@
 #
 set expected_mask [ expr ((1 << $task_cnt) - 1) ]
 set task_mask 0
-send "$srun -n $task_cnt -m plane=4 ./$file_prog\r"
+send "$srun -n $task_cnt -m plane=4 ./$file_prog; echo EXIT_CODE:$?\r"
 expect {
 	-re "TASK_ID:($number),MASK:($number)" {
 		incr task_mask $expect_out(2,string)
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "salloc not responding or failure to recognize prompt"
@@ -504,9 +520,6 @@
 #
 send "exit\r"
 expect {
-	-re "error" {
-		fail "Some error occurred"
-	}
 	timeout {
 		fail "salloc not responding or failure to recognize prompt"
 	}
diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92
index adfbfa6..c710698 100755
--- a/testsuite/expect/test1.92
+++ b/testsuite/expect/test1.92
@@ -61,7 +61,7 @@
 set task_cnt 0
 set prev_node -1
 set node_cnt 0
-spawn $bin_bash -c "$srun $srun_args -l -c1 $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -c1 $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
 expect {
 	-re "nodeid:($number) taskid:($number)" {
 		set this_node $expect_out(1,string)
@@ -73,8 +73,10 @@
 		}
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "salloc not responding or failure to recognize prompt"
@@ -96,7 +98,7 @@
 set this_cnt  0
 set prev_node -1
 set this_node -1
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m block $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m block $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
 expect {
 	-re "nodeid:($number) taskid:($number) localid:($number)" {
 		set this_node $expect_out(1,string)
@@ -112,8 +114,10 @@
 		}
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "srun not responding or failure to recognize prompt"
@@ -131,7 +135,7 @@
 set prev_node -1
 set this_node -1
 set prev_cnt  $block_size
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m cyclic $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m cyclic $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
 expect {
 	-re "nodeid:($number) taskid:($number) localid:($number)" {
 		set this_node $expect_out(1,string)
@@ -147,8 +151,10 @@
 		}
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "srun not responding or failure to recognize prompt"
@@ -166,7 +172,7 @@
 set prev_node -1
 set this_node -1
 set prev_cnt  $block_size
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m plane=$block_size $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m plane=$block_size $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
 expect {
 	-re "nodeid:($number) taskid:($number) localid:($number)" {
 		set this_node $expect_out(1,string)
@@ -182,8 +188,10 @@
 		}
 		exp_continue
 	}
-	-re "error" {
-		fail "Some error occurred"
+	-re "EXIT_CODE:(\d+)" {
+		if {[$expect_out(1,string)] != 0} {
+			fail "Some error occurred"
+		}
 	}
 	timeout {
 		fail "srun not responding or failure to recognize prompt"
diff --git a/testsuite/expect/test15.27 b/testsuite/expect/test15.27
index cbdf2ec..57de92c 100755
--- a/testsuite/expect/test15.27
+++ b/testsuite/expect/test15.27
@@ -72,6 +72,11 @@
 	skip "Test invalid without slurmdbd"
 }
 
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "In 25.11 we fixed some issues with stepmgr that were leading to random fails in this test."
+}
+
 set node_name [get_nodes_by_request "--gres=gpu:2 -n1 -t1"]
 if { [llength $node_name] != 1 } {
 	skip "This test need to be able to submit jobs with at least --gres=gpu:2"
diff --git a/testsuite/expect/test15.37 b/testsuite/expect/test15.37
index 55b7935..2a63f01 100755
--- a/testsuite/expect/test15.37
+++ b/testsuite/expect/test15.37
@@ -30,6 +30,11 @@
 
 set job_list [list]
 
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "In 25.11 we fixed some issues with stepmgr that were leading to random fails in this test."
+}
+
 #
 # Get the node to use
 #
diff --git a/testsuite/expect/test17.12 b/testsuite/expect/test17.12
index 4839db3..d0c84f5 100755
--- a/testsuite/expect/test17.12
+++ b/testsuite/expect/test17.12
@@ -51,6 +51,11 @@
 #
 # Prerequisites
 #
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "Ticket 23611: In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
 if {![is_super_user] || [get_config_param "NodeFeaturesPlugins"] ne "(null)"} {
 	skip "Test needs super user and not NodeFeaturesPlugins"
 }
diff --git a/testsuite/expect/test17.17 b/testsuite/expect/test17.17
index 08784a5..0b24464 100755
--- a/testsuite/expect/test17.17
+++ b/testsuite/expect/test17.17
@@ -44,6 +44,11 @@
 	cancel_job [list $job_id1 $job_id2]
 }
 
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
 set node_name [get_nodes_by_request "--gres=gpu:2 -n1 -t1"]
 if { [llength $node_name] != 1 } {
 	skip "This test need to be able to submit jobs with at least --gres=gpu:2"
diff --git a/testsuite/expect/test17.60 b/testsuite/expect/test17.60
index 1448c36..d2a4f9b 100755
--- a/testsuite/expect/test17.60
+++ b/testsuite/expect/test17.60
@@ -30,6 +30,11 @@
 
 set job_list [list]
 
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
 #
 # Get the node to use
 #
diff --git a/testsuite/expect/test39.5 b/testsuite/expect/test39.5
index 3da06ef..d83c2c3 100755
--- a/testsuite/expect/test39.5
+++ b/testsuite/expect/test39.5
@@ -81,6 +81,11 @@
 	skip "This test is only compatible with select/cons_tres"
 }
 
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+    [package vcompare [get_version] "25.11.0"] < 0 } {
+	skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
 set nb_nodes [get_partition_param [default_partition] "TotalNodes"]
 if {$nb_nodes > 1} {
 	set nb_nodes 2
diff --git a/testsuite/expect/test9.2 b/testsuite/expect/test9.2
index b0bfcad..ae6d1fc 100755
--- a/testsuite/expect/test9.2
+++ b/testsuite/expect/test9.2
@@ -33,55 +33,28 @@
 
 set file_out     "$test_dir/output"
 set job_name     $test_name
-
-set cycle_count [get_cycle_count]
-set task_cnt    $max_stress_tasks
-set node_cnt	1-4
+set cycle_count  2
+set task_cnt     [get_total_cpus]
+set node_cnt     [llength [get_nodes_by_state]]
 set other_opts   "-O"
 
-# Execute an srun job to print hostname to output_file with task_cnt tasks per node,
-# wait for completion
-# Returns 0 on successful completion, returns 1 otherwise
-proc run_hostname_job { output_file } {
-	global bin_printenv bin_rm job_name number srun node_cnt other_opts
-	global task_cnt timeout
-	exec $bin_rm -f $output_file
-
-	spawn $srun --job-name=$job_name --input=/dev/null --output=$output_file --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_printenv SLURMD_NODENAME
-	expect {
-		-re "Unable to contact" {
-			fail "Slurm appears to be down"
-		}
-		timeout {
-			fail "srun not responding"
-		}
-		eof {
-			wait
-		}
-	}
-
-	wait_for_file -fail $output_file
-}
+log_info "Using $task_cnt tasks"
 
 #
 # Run cycle_count jobs to run "hostname" and check output file size
 #
-set success_cnt 0
-set timeout $max_job_delay
 for {set inx 0} {$inx < $cycle_count} {incr inx} {
-	run_hostname_job $file_out
-	set stdout_lines [get_line_cnt $file_out]
-	if {$stdout_lines != $task_cnt} {
-		exec $bin_sleep 1
+	# Remove files from previous cycle
+	exec $bin_rm -f $file_out
+
+	# Submit a job that copies -i to -o
+	run_command -fail "$srun --job-name=$job_name --input=/dev/null --output=$file_out --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_printenv SLURMD_NODENAME"
+
+	# Test output file size
+	set stdout_lines 0
+	wait_for_file $file_out
+	wait_for {$stdout_lines == $task_cnt} {
 		set stdout_lines [get_line_cnt $file_out]
 	}
-	if {$stdout_lines != $task_cnt} {
-		if {$stdout_lines == 0} {
-			fail "stdout is empty. Is current working directory writable from compute nodes?"
-		} else {
-			fail "stdout is incomplete"
-		}
-	} else {
-		incr success_cnt
-	}
+	subtest {$stdout_lines == $task_cnt} "Output file should have one line per task" "$stdout_lines != $task_cnt"
 }
diff --git a/testsuite/python/check/common/test_timespec_t.c b/testsuite/python/check/common/test_timespec_t.c
index 6a76c77..dd17ae4 100644
--- a/testsuite/python/check/common/test_timespec_t.c
+++ b/testsuite/python/check/common/test_timespec_t.c
@@ -67,16 +67,16 @@
 	ck_assert(y.tv_nsec == 5);
 	ck_assert(y.tv_sec == 5);
 
-	x = (timespec_t) { 10, (10 * NSEC_IN_SEC) };
-	y = (timespec_t) { 5, (5 * NSEC_IN_SEC) };
+	x = (timespec_t) { 10, ((2 * NSEC_IN_SEC) + 150) };
+	y = (timespec_t) { 5, ((1 * NSEC_IN_SEC) + 88) };
 
 	x = timespec_normalize(x);
-	ck_assert(x.tv_nsec == 20);
-	ck_assert(x.tv_sec == 0);
+	ck_assert(x.tv_sec == 12);
+	ck_assert(x.tv_nsec == 150);
 
 	y = timespec_normalize(y);
-	ck_assert(y.tv_nsec == 10);
-	ck_assert(y.tv_sec == 0);
+	ck_assert(y.tv_sec == 6);
+	ck_assert(y.tv_nsec == 88);
 }
 
 END_TEST
@@ -115,6 +115,10 @@
 
 START_TEST(test_rem)
 {
+#if SLURM_VERSION_NUMBER < SLURM_VERSION_NUM(26, 5, 0)
+	printf("XFAIL: Issue #50096: timespec_t - Math operations fail cross negatives\n");
+#endif
+
 	timespec_t x = { 10, 4 };
 	timespec_t y = { 5, 2 };
 	timespec_t t1 = { 0 }, t2 = { 0 }, t3 = { 0 };
@@ -123,18 +127,13 @@
 	ck_assert(t1.tv_sec == 5);
 	ck_assert(t1.tv_nsec == 2);
 
-	/* Negative math is rejected currently */
 	t2 = timespec_rem(y, x);
-	//ck_assert(t2.tv_sec == -5);
-	//ck_assert(t2.tv_nsec == -2);
-	ck_assert(t2.tv_sec == 0);
-	ck_assert(t2.tv_nsec == 0);
+	ck_assert(t2.tv_sec == -5);
+	ck_assert(t2.tv_nsec == -2);
 
-	t3 = timespec_rem(t1, t2);
-	//ck_assert(t3.tv_sec == 0);
-	//ck_assert(t3.tv_nsec == 0);
-	ck_assert(t3.tv_sec == 5);
-	ck_assert(t3.tv_nsec == 2);
+	t3 = timespec_add(t1, t2);
+	ck_assert(t3.tv_sec == 0);
+	ck_assert(t3.tv_nsec == 0);
 }
 
 END_TEST
diff --git a/testsuite/python/conftest.py b/testsuite/python/conftest.py
index 9746d61..8d4aaf7 100644
--- a/testsuite/python/conftest.py
+++ b/testsuite/python/conftest.py
@@ -234,6 +234,34 @@
                 quiet=True,
             )
 
+        # Backup current SysConfigDir
+        if os.path.exists(atf.properties["slurm-config-dir"]):
+            if os.path.exists(atf.properties["slurm-config-dir"] + name):
+                logging.warning(
+                    f"Backup for SysConfigDir already exists ({atf.properties['slurm-config-dir']+name}). Removing it."
+                )
+                atf.run_command(
+                    f"rm -rf {atf.properties['slurm-config-dir']+name}",
+                    user="root",
+                    quiet=True,
+                )
+            atf.run_command(
+                f"rsync -a --delete {atf.properties['slurm-config-dir']}/ {atf.properties['slurm-config-dir']+name}/",
+                user="root",
+                quiet=True,
+            )
+        # Setup a fresh SysConfDir based on slurm-config-orig-dir
+        if os.path.exists(atf.properties["slurm-config-orig-dir"]):
+            atf.run_command(
+                f"sudo rsync -a --delete {atf.properties['slurm-config-orig-dir']}/ {atf.properties['slurm-config-dir']}/",
+                quiet=True,
+                fatal=True,
+            )
+        else:
+            logging.warning(
+                f"Base SysConfDir ({atf.properties['slurm-config-orig-dir']}) doesn't exists. Using current SysConfDir as the Base one."
+            )
+
         # Create the required node directories for node0
         node_name = "node0"
         spool_dir = atf.properties["slurm-spool-dir"].replace("%n", node_name)
@@ -281,6 +309,23 @@
                 quiet=True,
             )
 
+        # Restore SysConfigDir
+        atf.run_command(
+            f"rm -rf {atf.properties['slurm-config-dir']}", user="root", quiet=True
+        )
+        if os.path.exists(
+            atf.properties["slurm-config-dir"] + atf.properties["test_name"]
+        ):
+            atf.run_command(
+                f"mv {atf.properties['slurm-config-dir']+atf.properties['test_name']} {atf.properties['slurm-config-dir']}",
+                user="root",
+                quiet=True,
+            )
+        else:
+            logging.warning(
+                "SysConfDir backup doesn't exists, but it should be created in the module_setup fixture."
+            )
+
         # Remove Nodes directories:
         if "nodes" not in atf.properties:
             atf.properties["nodes"] = ["node0"]
@@ -317,10 +362,6 @@
                 fatal=True,
             )
 
-        # Restore any backed up configuration files
-        for config in set(atf.properties["configurations-modified"]):
-            atf.restore_config_file(config)
-
         # Clean influxdb
         if atf.properties["influxdb-started"]:
             atf.request_influxdb(f"DROP DATABASE {atf.properties['influxdb_db']}")
diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py
index 9fe5080..0452bb8 100644
--- a/testsuite/python/lib/atf.py
+++ b/testsuite/python/lib/atf.py
@@ -28,6 +28,7 @@
 
 import json
 import jsondiff
+import jsonpatch
 import yaml
 
 # This module will be (un)imported in require_openapi_generator()
@@ -1436,6 +1437,76 @@
     )
 
 
+def get_deprecated_openapi_spec_patch(openapi_spec, undeprecate=False):
+    """
+    Returns a JsonPatch object that can be applied to the openapi_spec to
+    fully deprecate it, as we should do for older specs in newer Slurm
+    versions.
+
+    If undeprecate is set to True, then it does the opposite, it returns a
+    JsonPatch to remove all deprecate marks of the openapi_specs. This
+    should NOT be used in general, but as we generated some openapi_specs
+    with newer Slurm versions, we may need to undeprecate them (see v41).
+    """
+    ops = []
+
+    def _escape_json_pointer(s: str) -> str:
+        return s.replace("~", "~0").replace("/", "~1")
+
+    for raw_path, path_obj in openapi_spec.get("paths", {}).items():
+        escaped_path = _escape_json_pointer(raw_path)
+
+        for method, operation in path_obj.items():
+            if method.lower() not in (
+                "get",
+                "post",
+                "put",
+                "delete",
+                "patch",
+                "head",
+                "options",
+            ):
+                continue
+
+            # 1. Mark the operation itself as deprecated
+            if not undeprecate:
+                ops.append(
+                    {
+                        "op": "add",
+                        "path": f"/paths/{escaped_path}/{method}/deprecated",
+                        "value": True,
+                    }
+                )
+            else:
+                ops.append(
+                    {
+                        "op": "remove",
+                        "path": f"/paths/{escaped_path}/{method}/deprecated",
+                    }
+                )
+
+            # 2. Mark every parameter as deprecated
+            params = operation.get("parameters", [])
+            for i, _ in enumerate(params):
+                if not undeprecate:
+                    ops.append(
+                        {
+                            "op": "add",
+                            "path": f"/paths/{escaped_path}/{method}/parameters/{i}/deprecated",
+                            "value": True,
+                        }
+                    )
+                else:
+                    ops.append(
+                        {
+                            "op": "remove",
+                            "path": f"/paths/{escaped_path}/{method}/parameters/{i}/deprecated",
+                        }
+                    )
+
+    return jsonpatch.JsonPatch(ops)
+
+
 def assert_openapi_spec_eq(spec_a, spec_b):
     """
     Asserts that two OpenAPI specifications are equal, ignoring some info and
@@ -1606,126 +1677,6 @@
     )
 
 
-def backup_config_file(config="slurm.conf"):
-    """Backs up a configuration file.
-
-    This function may only be used in auto-config mode.
-
-    Args:
-        config (string): Name of the config file to back up.
-
-    Returns:
-        None
-
-    Example:
-        >>> backup_config_file('slurm.conf')
-        >>> backup_config_file('gres.conf')
-        >>> backup_config_file('cgroup.conf')
-    """
-
-    if not properties["auto-config"]:
-        require_auto_config(f"wants to modify the {config} file")
-
-    properties["configurations-modified"].add(config)
-
-    config_file = f"{properties['slurm-config-dir']}/{config}"
-    backup_config_file = f"{config_file}.orig-atf"
-
-    # If a backup already exists, issue a warning and return (honor existing backup)
-    if os.path.isfile(backup_config_file):
-        logging.trace(f"Backup file already exists ({backup_config_file})")
-        return
-
-    # If the file to backup does not exist, touch an empty backup file with
-    # the sticky bit set. restore_config_file will remove the file.
-    if not os.path.isfile(config_file):
-        run_command(
-            f"touch {backup_config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-        run_command(
-            f"chmod 1000 {backup_config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-
-    # Otherwise, copy the config file to the backup
-    else:
-        run_command(
-            f"cp {config_file} {backup_config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-
-
-def restore_config_file(config="slurm.conf"):
-    """Restores a configuration file.
-
-    This function may only be used in auto-config mode.
-
-    Args:
-        config (string): Name of config file to restore.
-
-    Returns:
-        None
-
-    Example:
-        >>> restore_config_file('slurm.conf')
-        >>> restore_config_file('gres.conf')
-        >>> restore_config_file('cgroup.conf')
-    """
-
-    config_file = f"{properties['slurm-config-dir']}/{config}"
-    backup_config_file = f"{config_file}.orig-atf"
-
-    properties["configurations-modified"].remove(config)
-
-    # If backup file doesn't exist, it has probably already been
-    # restored by a previous call to restore_config_file
-    if not os.path.isfile(backup_config_file):
-        logging.trace(
-            f"Backup file does not exist for {config_file}. It has probably already been restored."
-        )
-        return
-
-    # If the sticky bit is set and the file is empty, remove both the file and the backup
-    backup_stat = os.stat(backup_config_file)
-    if backup_stat.st_size == 0 and backup_stat.st_mode & stat.S_ISVTX:
-        run_command(
-            f"rm -f {backup_config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-        if os.path.isfile(config_file):
-            run_command(
-                f"rm -f {config_file}",
-                user=properties["slurm-user"],
-                fatal=True,
-                quiet=True,
-            )
-
-    # Otherwise, copy backup config file to primary config file
-    # and remove the backup (.orig-atf)
-    else:
-        run_command(
-            f"cp {backup_config_file} {config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-        run_command(
-            f"rm -f {backup_config_file}",
-            user=properties["slurm-user"],
-            fatal=True,
-            quiet=True,
-        )
-
-
 def get_config(live=True, source="slurm", quiet=False, delimiter="="):
     """Returns the Slurm configuration as a dictionary.
 
@@ -1943,9 +1894,6 @@
 
     config_file = f"{properties['slurm-config-dir']}/{config}.conf"
 
-    # This has the side-effect of adding config to configurations-modified
-    backup_config_file(f"{config}.conf")
-
     # Remove all matching parameters and append the new parameter
     lines = []
     output = run_command_output(
@@ -2297,7 +2245,6 @@
         )
 
     # In auto-config
-    backup_config_file(filename)
     run_command(
         f"cat > {file_path}", input=content, user=properties["slurm-user"], fatal=True
     )
@@ -3028,7 +2975,6 @@
         )
 
     # Write the config file back out with the modifications
-    backup_config_file("slurm.conf")
     new_config_string = "\n".join(new_config_lines)
     run_command(
         f"echo '{new_config_string}' > {config_file}",
@@ -4337,7 +4283,6 @@
     new_config_lines.insert(last_node_line_index + 1, node_line)
 
     # Write the config file back out with the modifications
-    backup_config_file("slurm.conf")
     new_config_string = "\n".join(new_config_lines)
     run_command(
         f"echo '{new_config_string}' > {config_file}",
@@ -5110,7 +5055,6 @@
         )
 
     # Write the config file back out with the modifications
-    backup_config_file("slurm.conf")
     new_config_string = "\n".join(new_config_lines)
     run_command(
         f"echo '{new_config_string}' > {config_file}",
@@ -5266,6 +5210,10 @@
 if "influxdb_db" in testsuite_config:
     properties["influxdb_db"] = testsuite_config["influxdb_db"]
 
+# TODO: The SlurmConfigDirBase should be passed from the testsuite.conf too,
+#       instead of the hadcoded prefix+etc.orig form here.
+properties["slurm-config-orig-dir"] = properties["slurm-prefix"] + "/etc.orig"
+
 # Set derived directory properties
 # The environment (e.g. PATH, SLURM_CONF) overrides the configuration.
 # If the Slurm clients and daemons are not in the current PATH
diff --git a/testsuite/python/tests/test_100_1.py b/testsuite/python/tests/test_100_1.py
index 3e4aefc..61767ba 100644
--- a/testsuite/python/tests/test_100_1.py
+++ b/testsuite/python/tests/test_100_1.py
@@ -55,6 +55,14 @@
 """
 
 # TODO: Remove xfail_tests.append() once their issue is fixed.
+if atf.get_version() < (26, 5):
+    xfail_tests.append(
+        (
+            "common/test_timespec_t.c",
+            "test_rem",
+            "Issue #50096. Math operations fail cross negatives",
+        )
+    )
 if atf.get_version() < (25, 11):
     skip_tests.append(
         (
@@ -82,14 +90,6 @@
             "conmgr test doesn't compile for versions < 25.05",
         )
     )
-else:
-    xfail_tests.append(
-        (
-            "common/test_timespec_t.c",
-            "test_normalize",
-            "Issue #50096. Math operations fail cross negatives",
-        )
-    )
 
 # Create a test_function() for all test file in the testsuite_check_dir.
 # All the test_function()s will be run by pytest as if actually defined here.
diff --git a/testsuite/python/tests/test_112_41.py b/testsuite/python/tests/test_112_41.py
index 08e1c59..c9a8953 100644
--- a/testsuite/python/tests/test_112_41.py
+++ b/testsuite/python/tests/test_112_41.py
@@ -10,6 +10,8 @@
 # import json
 # import logging
 
+import jsonpatch
+
 random.seed()
 
 cluster_name = f"test-cluster-{random.randrange(0, 99999999999)}"
@@ -229,9 +231,60 @@
     assert "/slurmdb/v0.0.41/jobs/" in spec["paths"].keys()
 
 
-@pytest.mark.xfail(reason="Ticket 23807: Schema changed")
 @pytest.mark.parametrize("openapi_spec", ["41"], indirect=True)
 def test_specification(openapi_spec):
+    base_path = "/components/schemas/v0.0.41_"
+
+    # NOTE: The OpenAPI spec for v41 was generated by slurmrestd 25.11 instead
+    #       of its original version back in 24.05. Therefore, changes done
+    #       from 24.05 to 25.11 should be reverted from the spec to test
+    #       in older versions.
+    if atf.get_version("sbin/slurmrestd") < (25, 11):
+        # In 25.11 we deprecated the whole v41, we need to "undeprecate" it for
+        # older versions
+        patch = atf.get_deprecated_openapi_spec_patch(openapi_spec, undeprecate=True)
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") < (25, 5):
+        # Issue 50233: In 25.05 we added the delete reservation endpoint.
+        path = "/paths/~1slurm~1v0.0.41~1reservation~1{reservation_name}/delete"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # In 25.05 we removed frontend code
+        path = "/paths/~1slurmdb~1v0.0.41~1cluster~1{cluster_name}/delete/parameters/4/schema/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+        path = "/paths/~1slurmdb~1v0.0.41~1cluster~1{cluster_name}/get/parameters/4/schema/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+        path = "/components/schemas/v0.0.41_cluster_rec_flags/items/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # In 25.05 we remove node_record_t->alloc_tres_weighted (!881), and
+        # deprecated it in the REST API
+        path = "/components/schemas/v0.0.41_node/properties/tres_weighted/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 5):
+        # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+        path = base_path + "job_submit_req/properties/script/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 11):
+        # Ticket 23874: In 25.11 we deprecated parts_packed
+        path = base_path + "stats_msg/properties/parts_packed/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+        path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+        patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+        patch.apply(openapi_spec, in_place=True)
+
     atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
 
 
diff --git a/testsuite/python/tests/test_112_42.py b/testsuite/python/tests/test_112_42.py
index 3abc519..a17a996 100644
--- a/testsuite/python/tests/test_112_42.py
+++ b/testsuite/python/tests/test_112_42.py
@@ -5,6 +5,7 @@
 import pytest
 import getpass
 import json
+import jsonpatch
 import random
 import logging
 import os
@@ -229,13 +230,59 @@
     assert "/slurmdb/v0.0.42/jobs/" in spec["paths"].keys()
 
 
-@pytest.mark.xfail(
-    atf.get_version("sbin/slurmrestd") >= (25, 11)
-    or atf.get_version("sbin/slurmrestd") <= (24, 11, 6),
-    reason="Ticket 23807: Schema changed",
-)
 @pytest.mark.parametrize("openapi_spec", ["42"], indirect=True)
 def test_specification(openapi_spec):
+    base_path = "/components/schemas/v0.0.42_"
+
+    # NOTE: The OpenAPI spec for v42 was generated by slurmrestd 25.11 instead
+    #       of its original version back in 24.11. Therefore, changes done
+    #       from 24.11 to 25.11 should be reverted from the spec to test
+    #       in older versions.
+    if atf.get_version("sbin/slurmrestd") < (25, 5):
+        # Issue 50233: In 25.05 we added the delete reservation endpoint.
+        path = "/paths/~1slurm~1v0.0.42~1reservation~1{reservation_name}/delete"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # In 25.05 we removed frontend code
+        path = "/paths/~1slurmdb~1v0.0.42~1cluster~1{cluster_name}/delete/parameters/4/schema/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+        path = "/paths/~1slurmdb~1v0.0.42~1cluster~1{cluster_name}/get/parameters/4/schema/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+        path = "/components/schemas/v0.0.42_cluster_rec_flags/items/enum/2"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # In 25.05 we remove node_record_t->alloc_tres_weighted (!881), and
+        # deprecated it in the REST API
+        path = "/components/schemas/v0.0.42_node/properties/tres_weighted/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 5):
+        # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+        path = base_path + "job_submit_req/properties/script/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 11):
+        # Ticket 23874: In 25.11 we deprecated parts_packed
+        path = base_path + "stats_msg/properties/parts_packed/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+        path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+        patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (26, 5):
+        # This is expected to be deprecated in 26.05+
+        patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+        patch.apply(openapi_spec, in_place=True)
+
     atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
 
 
diff --git a/testsuite/python/tests/test_112_43.py b/testsuite/python/tests/test_112_43.py
index 7d4f03d..c2d6db6 100644
--- a/testsuite/python/tests/test_112_43.py
+++ b/testsuite/python/tests/test_112_43.py
@@ -5,6 +5,7 @@
 import pytest
 import getpass
 import json
+import jsonpatch
 import random
 import logging
 import time
@@ -230,12 +231,32 @@
     assert "/slurmdb/v0.0.43/jobs/" in spec["paths"].keys()
 
 
-@pytest.mark.xfail(
-    atf.get_version("sbin/slurmrestd") >= (25, 11),
-    reason="Ticket 23807: Schema changed",
-)
 @pytest.mark.parametrize("openapi_spec", ["43"], indirect=True)
 def test_specification(openapi_spec):
+    base_path = "/components/schemas/v0.0.43_"
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 5):
+        # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+        path = base_path + "job_submit_req/properties/script/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 11):
+        # Ticket 23874: In 25.11 we deprecated parts_packed
+        path = base_path + "stats_msg/properties/parts_packed/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+        patch.apply(openapi_spec, in_place=True)
+
+        # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+        path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+        patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (26, 11):
+        # This is expected to be deprecated in 26.11+
+        patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+        patch.apply(openapi_spec, in_place=True)
+
     atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
 
 
diff --git a/testsuite/python/tests/test_112_44.py b/testsuite/python/tests/test_112_44.py
index 497d24b..5dd8d28 100644
--- a/testsuite/python/tests/test_112_44.py
+++ b/testsuite/python/tests/test_112_44.py
@@ -5,6 +5,7 @@
 import pytest
 import getpass
 import json
+import jsonpatch
 import random
 import logging
 import time
@@ -291,6 +292,19 @@
 
 @pytest.mark.parametrize("openapi_spec", ["44"], indirect=True)
 def test_specification(openapi_spec):
+    base_path = "/components/schemas/v0.0.44_"
+
+    if atf.get_version("sbin/slurmrestd") >= (25, 11):
+        # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+        path = base_path + "job_submit_req/properties/script/deprecated"
+        patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+        patch.apply(openapi_spec, in_place=True)
+
+    if atf.get_version("sbin/slurmrestd") >= (27, 5):
+        # This is expected to be deprecated in 27.05+
+        patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+        patch.apply(openapi_spec, in_place=True)
+
     atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
 
 
diff --git a/testsuite/python/tests/test_112_45.py b/testsuite/python/tests/test_112_45.py
index 82cc32b..623ff8f 100644
--- a/testsuite/python/tests/test_112_45.py
+++ b/testsuite/python/tests/test_112_45.py
@@ -295,6 +295,11 @@
 )
 @pytest.mark.parametrize("openapi_spec", ["45"], indirect=True)
 def test_specification(openapi_spec):
+    if atf.get_version("sbin/slurmrestd") >= (27, 11):
+        # This is expected to be deprecated in 27.11+
+        patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+        patch.apply(openapi_spec, in_place=True)
+
     atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
 
 
@@ -1264,8 +1269,8 @@
         assert job.user == local_user_name
 
     # Update job in db -- posting to /job/jobid/
-    atf.wait_for_job_accounted(jobid1, "End", fatal=True)
-    atf.wait_for_job_accounted(jobid2, "End", fatal=True)
+    atf.wait_for_job_accounted(jobid1, "State", "COMPLETED", fatal=True)
+    atf.wait_for_job_accounted(jobid2, "State", "COMPLETED", fatal=True)
 
     atf.run_command(
         f"sacctmgr -i mod user {local_cluster_name} set AdminLevel=Admin",
diff --git a/testsuite/python/tests/test_133_1.c b/testsuite/python/tests/test_133_1.c
index 6c3fb66..5008169 100644
--- a/testsuite/python/tests/test_133_1.c
+++ b/testsuite/python/tests/test_133_1.c
@@ -192,7 +192,12 @@
 		fatal("unable to split forward hostlist");
 	}
 	END_TIMER;
+#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(26, 5, 0)
+	/* In #50113 we converted to timespec_t logic with new macros */
 	et = TIMER_DURATION_USEC();
+#else
+	et = DELTA_TIMER;
+#endif
 	for (j = 0; j < hl_count; j++) {
 		hostlist_destroy(sp_hl[j]);
 	}
diff --git a/testsuite/slurm_unit/backfill/dummy_functions.c b/testsuite/slurm_unit/backfill/dummy_functions.c
index 862ad52..f361030 100644
--- a/testsuite/slurm_unit/backfill/dummy_functions.c
+++ b/testsuite/slurm_unit/backfill/dummy_functions.c
@@ -103,13 +103,15 @@
 	return 1;
 }
 
-void job_state_set(job_record_t *job_ptr, uint32_t state)
+void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+			 const char *caller)
 {
 	debug("%s %pJ %u", __func__, job_ptr, state);
 	job_ptr->job_state = state;
 }
 
-void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag)
+void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+				const char *caller)
 {
 	uint32_t job_state;