Merge branch 'broderick/rest-power-down-asap' into 'master'
See merge request SchedMD/dev/slurm!2702
diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am
index 17f2bca..1517c44 100644
--- a/doc/html/Makefile.am
+++ b/doc/html/Makefile.am
@@ -36,7 +36,6 @@
hres.html \
jobcomp_kafka.html \
job_array.html \
- job_container_tmpfs.html \
job_exit_code.html \
job_launch.html \
job_reason_codes.html \
@@ -53,6 +52,7 @@
metrics.html \
mpi_guide.html \
multi_cluster.html \
+ namespace.html \
network.html \
nss_slurm.html \
openapi_release_notes.html \
diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in
index 4ac3600..99d2197 100644
--- a/doc/html/Makefile.in
+++ b/doc/html/Makefile.in
@@ -496,7 +496,6 @@
hres.html \
jobcomp_kafka.html \
job_array.html \
- job_container_tmpfs.html \
job_exit_code.html \
job_launch.html \
job_reason_codes.html \
@@ -513,6 +512,7 @@
metrics.html \
mpi_guide.html \
multi_cluster.html \
+ namespace.html \
network.html \
nss_slurm.html \
openapi_release_notes.html \
diff --git a/doc/html/configless_slurm.shtml b/doc/html/configless_slurm.shtml
index 3ae8a51..95fe758 100644
--- a/doc/html/configless_slurm.shtml
+++ b/doc/html/configless_slurm.shtml
@@ -153,6 +153,7 @@
<li>helpers.conf</li>
<li>job_container.conf</li>
<li>mpi.conf</li>
+<li>namespace.yaml</li>
<li>oci.conf</li>
<li>plugstack.conf</li>
<li>scrun.lua</li>
diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml
index acda536..e02fe10 100644
--- a/doc/html/documentation.shtml
+++ b/doc/html/documentation.shtml
@@ -45,7 +45,7 @@
<li><a href="dynamic_nodes.html">Dynamic Nodes</a></li>
<li><a href="elasticsearch.html">Elasticsearch Guide</a></li>
<li><a href="jobcomp_kafka.html">Job Completion Kafka plugin Guide</a></li>
-<li><a href="job_container_tmpfs.html">job_container/tmpfs - Job Specific Temporary File Management</a></li>
+<li><a href="namespace_tmpfs.html">namespace/tmpfs - Job Specific Temporary File Management</a></li>
<li><a href="jwt.html">JSON Web Tokens Authentication</a></li>
<li><a href="federation.html">Federated Scheduling Guide</a></li>
<li><a href="pam_slurm_adopt.html">Job Containment (SSH Session Control) with pam_slurm_adopt</a></li>
diff --git a/doc/html/job_container_tmpfs.shtml b/doc/html/job_container_tmpfs.shtml
deleted file mode 100644
index 4e18c70..0000000
--- a/doc/html/job_container_tmpfs.shtml
+++ /dev/null
@@ -1,68 +0,0 @@
-<!--#include virtual="header.txt"-->
-
-<h1>job_container/tmpfs</h1>
-
-<h2 id="overview">Overview<a class="slurm_link" href="#overview"></a></h2>
-<p>job_container/tmpfs is an optional plugin that provides job-specific, private
-temporary file system space.</p>
-
-<p>When enabled on the cluster, a filesystem namespace will be created for each
-job with a unique, private instance of /tmp and /dev/shm for the job to use.
-These directories can be changed with the <b>Dirs=</b> option in
-job_container.conf. The contents of these directories will be removed at job
-termination.</p>
-
-<h2 id="installation">Installation
-<a class="slurm_link" href="#installation"></a>
-</h2>
-<p>This plugin is built and installed as part of the default build, no extra
-installation steps are required.</p>
-
-<h2 id="setup">Setup<a class="slurm_link" href="#setup"></a></h2>
-<p>Slurm must be configured to load the job container plugin by adding
-<b>JobContainerType=job_container/tmpfs</b> and <b>PrologFlags=contain</b> in
-slurm.conf. Additional configuration must be done in the job_container.conf
-file, which should be placed in the same directory as slurm.conf.</p>
-
-<p>Job containers can be configured for all nodes, or for a subset of nodes.
-As an example, if all nodes will be configured the same way, you would put the
-following in your job_container.conf:</p>
-
-<pre>
-AutoBasePath=true
-BasePath=/var/nvme/storage
-</pre>
-
-<p>A full description of the parameters available in the job_container.conf
-file can be found <a href="job_container.conf.html">here</a>.</p>
-
-<h2 id="initial_testing">Initial Testing
-<a class="slurm_link" href="#initial_testing"></a>
-</h2>
-<p>An easy way to verify that the container is working is to run a job and
-ensure that the /tmp directory is empty (since it normally has some other
-files) and that "." is owned by the user that submitted the job.</p>
-<pre>
-tim@slurm-ctld:~$ srun ls -al /tmp
-total 8
-drwx------ 2 tim root 4096 Feb 10 17:14 .
-drwxr-xr-x 21 root root 4096 Nov 15 08:46 ..
-</pre>
-
-<p>While a job is running, root should be able to confirm that
-<code>/$BasePath/$JobID/_tmp</code> exists and is empty. This directory is bind
-mounted into the job. <code>/$BasePath/$JobID</code> should be owned by root,
-and is not intended to be accessible to the user.</p>
-
-<h2 id="spank">SPANK<a class="slurm_link" href="#spank"></a></h2>
-<p>This plugin interfaces with the SPANK api, and automatically joins the job's
-container in the following functions:</p>
-<ul>
-<li>spank_task_init_privileged()</li>
-<li>spank_task_init()</li>
-</ul>
-
-<p>In addition to the job itself, the TaskProlog will also be executed inside
-the container.</p>
-
-<!--#include virtual="footer.txt"-->
diff --git a/doc/html/namespace.shtml b/doc/html/namespace.shtml
new file mode 100644
index 0000000..caa5224
--- /dev/null
+++ b/doc/html/namespace.shtml
@@ -0,0 +1,109 @@
+<!--#include virtual="header.txt"-->
+
+<h1>Namespace Plugins</h1>
+
+<h2 id="overview">Overview<a class="slurm_link" href="#overview"></a></h2>
+
+<p>A namespace plugin can be enabled to provide job-specific, private temporary
+file system space.</p>
+
+<p>When enabled on the cluster, a filesystem namespace will be created for each
+job with a unique, private instance of /tmp and /dev/shm for the job to use.
+These directories can be changed with the <b>Dirs=</b> option in the
+plugin-specific configuration file. The contents of these directories will be
+removed at job termination. Additionally, the <b>namespace/linux</b> plugin can
+be configured to create new PID and user namespaces.</p>
+
+<h2 id="installation">Installation
+<a class="slurm_link" href="#installation"></a>
+</h2>
+
+<p>These plugins are built and installed as part of the default build, no extra
+installation steps are required.</p>
+
+<h2 id="setup">Setup<a class="slurm_link" href="#setup"></a></h2>
+
+<p>Slurm must be configured to load the namespace plugin by adding
+<b>PrologFlags=contain</b> and setting <b>NamespaceType</b> to the desired
+plugin in slurm.conf. Additional configuration must be done in the
+plugin-specific configuration file file, which should be placed in the same
+directory as slurm.conf.</p>
+
+<h3 id="linux_setup">namespace/linux plugin
+<a class="slurm_link" href="#linux_setup"></a>
+</h3>
+
+<p>The <b>namespace/linux</b> plugin (added 25.11) uses the configuration file
+<a href="namespace.yaml.html">namespace.yaml</a>. This plugin can be configured
+to create user and PID namespaces in addition to a temporary filesystem
+namespace. Namespaces can be configured for all nodes or for a subset of nodes.
+As an example, if all nodes will be configured the same way, you could put the
+following in your namespace.yaml:</p>
+
+<pre>
+defaults:
+ auto_base_path: true
+ base_path: "/var/nvme/storage"
+</pre>
+
+<p><b>Note</b> the following important details with this plugin:</p>
+
+<ul>
+<li>This plugin requires <b>cgroup/v2</b> to operate correctly.</li>
+<li>When using user namespaces, bpf token support (added in kernel 6.9) is
+ required to use <a href="cgroup.conf.html#OPT_ConstrainDevices">
+ ConstrainDevices</a> in <b>cgroup.conf</b>.</li>
+</ul>
+
+<h3 id="tmpfs_setup">namespace/tmpfs plugin
+<a class="slurm_link" href="#tmpfs_setup"></a>
+</h3>
+
+<p>The <b>namespace/tmpfs</b> plugin (formerly job_container/tmpfs) uses the
+configuration file <a href="job_container.conf.html">job_container.conf</a>.
+Namespaces can be configured for all nodes, or for a subset of nodes. As an
+example, if all nodes will be configured the same way, you could put the
+following in your job_container.conf:</p>
+
+<pre>
+AutoBasePath=true
+BasePath=/var/nvme/storage
+</pre>
+
+<h2 id="initial_testing">Initial Testing
+<a class="slurm_link" href="#initial_testing"></a>
+</h2>
+
+<p>An easy way to verify that the container is working is to run a job and
+ensure that the /tmp directory is empty (since it normally has some other
+files) and that "." is owned by the user that submitted the job.</p>
+<pre>
+tim@slurm-ctld:~$ srun ls -al /tmp
+total 8
+drwx------ 2 tim root 4096 Feb 10 17:14 .
+drwxr-xr-x 21 root root 4096 Nov 15 08:46 ..
+</pre>
+
+<p>While a job is running, root should be able to confirm that
+<code>/$BasePath/$JobID/_tmp</code> exists and is empty. This directory is bind
+mounted into the job. <code>/$BasePath/$JobID</code> should be owned by root,
+and is not intended to be accessible to the user.</p>
+
+<p>Additionally, when the <b>Linux</b> plugin is in use, you can confirm that a
+PID namespace is in effect by running a job and running "ps". The only visible
+PIDs should be related to the job and PID 1 will be named <b>slurmstepd:
+[${job_id}.namespace]</b>.</p>
+
+<h2 id="spank">SPANK<a class="slurm_link" href="#spank"></a></h2>
+
+<p>This plugin interfaces with the SPANK api, and automatically joins the job's
+namespace in the following functions:</p>
+<ul>
+<li>spank_task_init_privileged()</li>
+<li>spank_task_init()</li>
+</ul>
+
+<p>In addition to the job itself, the TaskProlog will also be executed inside
+the container.</p>
+
+<!--#include virtual="footer.txt"-->
diff --git a/doc/html/pam_slurm_adopt.shtml b/doc/html/pam_slurm_adopt.shtml
index bcbbd78..e84b838 100644
--- a/doc/html/pam_slurm_adopt.shtml
+++ b/doc/html/pam_slurm_adopt.shtml
@@ -466,7 +466,7 @@
<b id="join_container">join_container</b>
<a class="slurm_link" href="#join_container"></a>
</dt>
-<dd>Control the interaction with the job_container/tmpfs plugin.
+<dd>Control the interaction with the namespace plugins.
Configurable values are:
<dl>
<dt></dt>
@@ -476,12 +476,12 @@
<b id="join_container_true">true</b> (default)
<a class="slurm_link" href="#join_container_true"></a>
</dt>
-<dd>Attempt to join a container created by the job_container/tmpfs plugin.</dd>
+<dd>Attempt to join a namespace created by the namespace plugins.</dd>
<dt>
<b id="join_container_false">false</b>
<a class="slurm_link" href="#join_container_false"></a>
</dt>
-<dd>Do not attempt to join a container.</dd>
+<dd>Do not attempt to join a namespace.</dd>
</dl>
</dd>
</dl>
@@ -563,7 +563,7 @@
</pre>
<p>It is possible for some plugins to require more permissions than this.
-Notably, job_container/tmpfs will require something more like this:</p>
+Notably, namespace/tmpfs will require something more like this:</p>
<pre>
module pam_slurm_adopt 1.0;
@@ -605,4 +605,8 @@
<p>When using SELinux support in Slurm, the session started via pam_slurm_adopt
won't necessarily be in the same context as the job it is associated with.</p>
+<p>When using namespace/linux and the user namespace is configured, the
+pam_limits module may not be able to set memlock, sigpending, msgqueue, nice, or
+rtprio.</p>
+
<!--#include virtual="footer.txt"-->
diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1
index 937517c..6798134 100644
--- a/doc/man/man1/sinfo.1
+++ b/doc/man/man1/sinfo.1
@@ -1038,7 +1038,9 @@
All jobs associated with this node are in the process of
COMPLETING. This node state will be removed when
all of the job's processes have terminated and the Slurm
-epilog program (if any) has terminated. See the \fBEpilog\fR
+epilog program (if any) has terminated. While a node is in
+the COMPLETING state, it is not eligible for new job placement
+and no jobs will be scheduled on it. See the \fBEpilog\fR
parameter description in the \fBslurm.conf\fR(5) man page for
more information.
.IP
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 89576e8..fdf1237 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -936,9 +936,11 @@
.TP
\fBCompleteWait\fR
-The time to wait, in seconds, when any job is in the COMPLETING state
-before any additional jobs are scheduled. This is to attempt to keep jobs on
-nodes that were recently in use, with the goal of preventing fragmentation.
+The time to wait, in seconds, when any job in the cluster is in the
+COMPLETING state before any additional jobs are scheduled. This parameter is
+evaluated at the cluster scheduling level, not per node. This is to attempt to
+keep jobs on nodes that were recently in use, with the goal of preventing
+fragmentation.
If set to zero, pending jobs will be started as soon as possible.
Since a COMPLETING job's resources are released for use by other
jobs as soon as the \fBEpilog\fR completes on each individual node,
@@ -951,6 +953,11 @@
The default value of \fBCompleteWait\fR is zero seconds.
The value may not exceed 65533.
+This parameter does not override the fact that nodes in the COMPLETING state
+are not eligible for job scheduling. Nodes will not accept new job placement
+until their Epilog completes and they leave the COMPLETING state, even if
+\fBCompleteWait\fR is set to zero.
+
\fBNOTE\fR: Setting \fBreduce_completing_frag\fR affects the behavior
of \fBCompleteWait\fR.
.IP
@@ -1479,6 +1486,11 @@
By default there is no epilog.
See \fBProlog and Epilog Scripts\fR for more information.
+While the Epilog script is running on a node, that node will be placed in the
+\fBCOMPLETING\fR state. Nodes in the COMPLETING state do not accept new job
+allocations, and no jobs will be scheduled on them until the Epilog has
+finished and the node has returned to an available state.
+
\fB\fBNOTE\fR: It is possible to configure multiple epilog scripts by including
this option on multiple lines.\fR
.IP
@@ -2799,24 +2811,29 @@
.TP
\fBNamespaceType\fR
Identifies the plugin to be used for job isolation through namespaces. To use
-these plugins, 'PrologFlags=Contain' must be set.
+these plugins, 'PrologFlags=Contain' must be set. Refer to the namespace page
+for further details: <https://slurm.schedmd.com/namespace.html>
+
\fBNOTE\fR: See \fBProctrackType\fR for resource containment and usage tracking.
+
Acceptable values at present include:
.IP
.RS
.TP 20
-\fBnamespace/tmpfs\fR
-Used to create a private namespace on the filesystem for jobs, which houses
-temporary file systems (/tmp and /dev/shm) for each job.
-.IP
-.TP
\fBnamespace/linux\fR
Used to create a private filesystem namespace and optionally user and pid
namespaces. The filesystem namespace houses temporary filesystems (/tmp and
/dev/shm) for each job.
+
\fBNOTE\fR: This plugin requires cgroup/v2 to operate correctly.
+.br
\fBNOTE\fR: When using user namespaces, bpf token support (added in kernel 6.9)
is required to use ConstrainDevices in \fBcgroup.conf\fR.
+.IP
+.TP
+\fBnamespace/tmpfs\fR
+Used to create a private namespace on the filesystem for jobs, which houses
+temporary file systems (/tmp and /dev/shm) for each job.
.RE
.IP
@@ -4543,13 +4560,6 @@
.IP
.TP
-\fBenable_job_state_cache\fR
-Enables an independent cache of job state details within slurmctld. This allows
-processing of `\fBsqueue\fR \-\-only\-job\-state` and replaced RPCs with minimal
-impact on other slurmctld operations.
-.IP
-
-.TP
\fBenable_user_top\fR
Enable use of the "scontrol top" command by non\-privileged users.
.IP
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 8356d46..923da94 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -34,6 +34,8 @@
env.h \
extra_constraints.c \
extra_constraints.h \
+ events.c \
+ events.h \
fd.c \
fd.h \
fetch_config.c \
diff --git a/src/common/Makefile.in b/src/common/Makefile.in
index 6cf5c1e..cc509a9 100644
--- a/src/common/Makefile.in
+++ b/src/common/Makefile.in
@@ -165,11 +165,11 @@
am_libcommon_la_OBJECTS = assoc_mgr.lo bitstring.lo callerid.lo \
cbuf.lo core_array.lo cpu_frequency.lo cron.lo daemonize.lo \
data.lo dynamic_plugin_data.lo eio.lo env.lo \
- extra_constraints.lo fd.lo fetch_config.lo forward.lo \
- global_defaults.lo group_cache.lo half_duplex.lo hostlist.lo \
- http.lo http_con.lo http_mime.lo http_router.lo http_switch.lo \
- identity.lo id_util.lo io_hdr.lo job_features.lo \
- job_options.lo job_record.lo job_resources.lo \
+ extra_constraints.lo events.lo fd.lo fetch_config.lo \
+ forward.lo global_defaults.lo group_cache.lo half_duplex.lo \
+ hostlist.lo http.lo http_con.lo http_mime.lo http_router.lo \
+ http_switch.lo identity.lo id_util.lo io_hdr.lo \
+ job_features.lo job_options.lo job_record.lo job_resources.lo \
job_state_reason.lo list.lo log.lo msg_type.lo net.lo \
node_conf.lo node_features.lo oci_config.lo openapi.lo optz.lo \
pack.lo parse_config.lo parse_time.lo parse_value.lo \
@@ -221,33 +221,33 @@
./$(DEPDIR)/cpu_frequency.Plo ./$(DEPDIR)/cron.Plo \
./$(DEPDIR)/daemonize.Plo ./$(DEPDIR)/data.Plo \
./$(DEPDIR)/dynamic_plugin_data.Plo ./$(DEPDIR)/eio.Plo \
- ./$(DEPDIR)/env.Plo ./$(DEPDIR)/extra_constraints.Plo \
- ./$(DEPDIR)/fd.Plo ./$(DEPDIR)/fetch_config.Plo \
- ./$(DEPDIR)/forward.Plo ./$(DEPDIR)/global_defaults.Plo \
- ./$(DEPDIR)/group_cache.Plo ./$(DEPDIR)/half_duplex.Plo \
- ./$(DEPDIR)/hostlist.Plo ./$(DEPDIR)/http.Plo \
- ./$(DEPDIR)/http_con.Plo ./$(DEPDIR)/http_mime.Plo \
- ./$(DEPDIR)/http_router.Plo ./$(DEPDIR)/http_switch.Plo \
- ./$(DEPDIR)/id_util.Plo ./$(DEPDIR)/identity.Plo \
- ./$(DEPDIR)/io_hdr.Plo ./$(DEPDIR)/job_features.Plo \
- ./$(DEPDIR)/job_options.Plo ./$(DEPDIR)/job_record.Plo \
- ./$(DEPDIR)/job_resources.Plo ./$(DEPDIR)/job_state_reason.Plo \
- ./$(DEPDIR)/list.Plo ./$(DEPDIR)/log.Plo \
- ./$(DEPDIR)/msg_type.Plo ./$(DEPDIR)/net.Plo \
- ./$(DEPDIR)/node_conf.Plo ./$(DEPDIR)/node_features.Plo \
- ./$(DEPDIR)/oci_config.Plo ./$(DEPDIR)/openapi.Plo \
- ./$(DEPDIR)/optz.Plo ./$(DEPDIR)/pack.Plo \
- ./$(DEPDIR)/parse_config.Plo ./$(DEPDIR)/parse_time.Plo \
- ./$(DEPDIR)/parse_value.Plo ./$(DEPDIR)/part_record.Plo \
- ./$(DEPDIR)/persist_conn.Plo ./$(DEPDIR)/plugin.Plo \
- ./$(DEPDIR)/plugrack.Plo ./$(DEPDIR)/port_mgr.Plo \
- ./$(DEPDIR)/print_fields.Plo ./$(DEPDIR)/probes.Plo \
- ./$(DEPDIR)/proc_args.Plo ./$(DEPDIR)/read_config.Plo \
- ./$(DEPDIR)/reverse_tree.Plo ./$(DEPDIR)/run_command.Plo \
- ./$(DEPDIR)/run_in_daemon.Plo ./$(DEPDIR)/sack_api.Plo \
- ./$(DEPDIR)/setproctitle.Plo ./$(DEPDIR)/sluid.Plo \
- ./$(DEPDIR)/slurm_errno.Plo ./$(DEPDIR)/slurm_opt.Plo \
- ./$(DEPDIR)/slurm_protocol_api.Plo \
+ ./$(DEPDIR)/env.Plo ./$(DEPDIR)/events.Plo \
+ ./$(DEPDIR)/extra_constraints.Plo ./$(DEPDIR)/fd.Plo \
+ ./$(DEPDIR)/fetch_config.Plo ./$(DEPDIR)/forward.Plo \
+ ./$(DEPDIR)/global_defaults.Plo ./$(DEPDIR)/group_cache.Plo \
+ ./$(DEPDIR)/half_duplex.Plo ./$(DEPDIR)/hostlist.Plo \
+ ./$(DEPDIR)/http.Plo ./$(DEPDIR)/http_con.Plo \
+ ./$(DEPDIR)/http_mime.Plo ./$(DEPDIR)/http_router.Plo \
+ ./$(DEPDIR)/http_switch.Plo ./$(DEPDIR)/id_util.Plo \
+ ./$(DEPDIR)/identity.Plo ./$(DEPDIR)/io_hdr.Plo \
+ ./$(DEPDIR)/job_features.Plo ./$(DEPDIR)/job_options.Plo \
+ ./$(DEPDIR)/job_record.Plo ./$(DEPDIR)/job_resources.Plo \
+ ./$(DEPDIR)/job_state_reason.Plo ./$(DEPDIR)/list.Plo \
+ ./$(DEPDIR)/log.Plo ./$(DEPDIR)/msg_type.Plo \
+ ./$(DEPDIR)/net.Plo ./$(DEPDIR)/node_conf.Plo \
+ ./$(DEPDIR)/node_features.Plo ./$(DEPDIR)/oci_config.Plo \
+ ./$(DEPDIR)/openapi.Plo ./$(DEPDIR)/optz.Plo \
+ ./$(DEPDIR)/pack.Plo ./$(DEPDIR)/parse_config.Plo \
+ ./$(DEPDIR)/parse_time.Plo ./$(DEPDIR)/parse_value.Plo \
+ ./$(DEPDIR)/part_record.Plo ./$(DEPDIR)/persist_conn.Plo \
+ ./$(DEPDIR)/plugin.Plo ./$(DEPDIR)/plugrack.Plo \
+ ./$(DEPDIR)/port_mgr.Plo ./$(DEPDIR)/print_fields.Plo \
+ ./$(DEPDIR)/probes.Plo ./$(DEPDIR)/proc_args.Plo \
+ ./$(DEPDIR)/read_config.Plo ./$(DEPDIR)/reverse_tree.Plo \
+ ./$(DEPDIR)/run_command.Plo ./$(DEPDIR)/run_in_daemon.Plo \
+ ./$(DEPDIR)/sack_api.Plo ./$(DEPDIR)/setproctitle.Plo \
+ ./$(DEPDIR)/sluid.Plo ./$(DEPDIR)/slurm_errno.Plo \
+ ./$(DEPDIR)/slurm_opt.Plo ./$(DEPDIR)/slurm_protocol_api.Plo \
./$(DEPDIR)/slurm_protocol_defs.Plo \
./$(DEPDIR)/slurm_protocol_pack.Plo \
./$(DEPDIR)/slurm_protocol_socket.Plo \
@@ -604,6 +604,8 @@
env.h \
extra_constraints.c \
extra_constraints.h \
+ events.c \
+ events.h \
fd.c \
fd.h \
fetch_config.c \
@@ -862,6 +864,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dynamic_plugin_data.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eio.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/events.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/extra_constraints.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fd.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fetch_config.Plo@am__quote@ # am--include-marker
@@ -1090,6 +1093,7 @@
-rm -f ./$(DEPDIR)/dynamic_plugin_data.Plo
-rm -f ./$(DEPDIR)/eio.Plo
-rm -f ./$(DEPDIR)/env.Plo
+ -rm -f ./$(DEPDIR)/events.Plo
-rm -f ./$(DEPDIR)/extra_constraints.Plo
-rm -f ./$(DEPDIR)/fd.Plo
-rm -f ./$(DEPDIR)/fetch_config.Plo
@@ -1238,6 +1242,7 @@
-rm -f ./$(DEPDIR)/dynamic_plugin_data.Plo
-rm -f ./$(DEPDIR)/eio.Plo
-rm -f ./$(DEPDIR)/env.Plo
+ -rm -f ./$(DEPDIR)/events.Plo
-rm -f ./$(DEPDIR)/extra_constraints.Plo
-rm -f ./$(DEPDIR)/fd.Plo
-rm -f ./$(DEPDIR)/fetch_config.Plo
diff --git a/src/conmgr/events.c b/src/common/events.c
similarity index 86%
rename from src/conmgr/events.c
rename to src/common/events.c
index b094fcf..ec1dd18 100644
--- a/src/conmgr/events.c
+++ b/src/common/events.c
@@ -35,17 +35,16 @@
#include <pthread.h>
+#include "src/common/events.h"
#include "src/common/macros.h"
#include "src/common/read_config.h"
#include "src/common/timers.h"
#include "src/common/xassert.h"
#include <time.h>
-#include "src/conmgr/events.h"
-
static void _wait_pending(event_signal_t *event, const char *caller)
{
- log_flag(CONMGR, "%s->%s: [EVENT:%s] wait skipped due to %d pending reliable signals",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] wait skipped due to %d pending reliable signals",
caller, __func__, event->name, event->pending);
xassert(!event->waiting);
@@ -60,10 +59,10 @@
{
DEF_TIMERS;
- if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
+ if (slurm_conf.debug_flags & DEBUG_FLAG_THREAD) {
START_TIMER;
- log_flag(CONMGR, "%s->%s: [EVENT:%s] BEGIN wait with %d other waiters",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] BEGIN wait with %d other waiters",
caller, __func__, event->name, event->waiting);
}
@@ -81,11 +80,11 @@
xassert(event->waiting >= 0);
xassert(!event->pending);
- if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
+ if (slurm_conf.debug_flags & DEBUG_FLAG_THREAD) {
/* we want the time but not to warn about a time limit */
- END_TIMER3(NULL, 0);
+ END_TIMER;
- log_flag(CONMGR, "%s->%s: [EVENT:%s] END waited after %s with %d other pending waiters",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] END waited after %s with %d other pending waiters",
caller, __func__, event->name, TIMER_STR(),
event->waiting);
}
@@ -103,7 +102,7 @@
static void _broadcast(event_signal_t *event, const char *caller)
{
if (!event->waiting) {
- log_flag(CONMGR, "%s->%s: [EVENT:%s] broadcast skipped due to 0 waiters with %d pending signals",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] broadcast skipped due to 0 waiters with %d pending signals",
caller, __func__, event->name, event->pending);
return;
}
@@ -111,7 +110,7 @@
/* can't have pending signals when there are waiters */
xassert(!event->pending);
- log_flag(CONMGR, "%s->%s: [EVENT:%s] broadcasting to all %d waiters",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] broadcasting to all %d waiters",
caller, __func__, event->name, event->pending);
slurm_cond_broadcast(&event->cond);
@@ -122,7 +121,7 @@
/* can't have pending signals when there are waiters */
xassert(!event->pending);
- log_flag(CONMGR, "%s->%s: [EVENT:%s] sending signal to 1/%d waiters",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] sending signal to 1/%d waiters",
caller, __func__, event->name, event->waiting);
slurm_cond_signal(&event->cond);
@@ -133,10 +132,10 @@
xassert(event->pending >= 0);
if (event->pending) {
- log_flag(CONMGR, "%s->%s: [EVENT:%s] skipping signal to 0 waiters with %d signals pending",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] skipping signal to 0 waiters with %d signals pending",
caller, __func__, event->name, event->pending);
} else {
- log_flag(CONMGR, "%s->%s: [EVENT:%s] enqueuing signal to 0 waiters with 0 signals pending",
+ log_flag(THREAD, "%s->%s: [EVENT:%s] enqueuing signal to 0 waiters with 0 signals pending",
caller, __func__, event->name);
event->pending++;
}
diff --git a/src/conmgr/events.h b/src/common/events.h
similarity index 92%
rename from src/conmgr/events.h
rename to src/common/events.h
index 07a82d2..33e3622 100644
--- a/src/conmgr/events.h
+++ b/src/common/events.h
@@ -37,8 +37,8 @@
* Named event based signaling and waiting
*/
-#ifndef _CONMGR_EVENTS_H
-#define _CONMGR_EVENTS_H
+#ifndef _SLURM_EVENTS_H
+#define _SLURM_EVENTS_H
#include <pthread.h>
#include <stdbool.h>
@@ -65,9 +65,9 @@
}
#define EVENT_FREE_MEMBERS(event) \
-do { \
- slurm_cond_destroy(&((event)->cond)); \
-} while (false)
+ do { \
+ slurm_cond_destroy(&((event)->cond)); \
+ } while (false)
/*
* Wait (aka block) for a signal for a given event
@@ -92,7 +92,7 @@
* IN mutex - the mutex controlling event->cond
*/
#define EVENT_WAIT(event, mutex) \
- event_wait_now(event, mutex, (struct timespec) {0}, __func__)
+ event_wait_now(event, mutex, (struct timespec) { 0 }, __func__)
/*
* Wait (aka block) for a signal for a given event
@@ -122,13 +122,11 @@
* Send signal to one waiter even if EVENT_WAIT() called later but drop signal
* if there is already another reliable signal pending a waiter.
*/
-#define EVENT_SIGNAL(event) \
- event_signal_now(false, event, __func__)
+#define EVENT_SIGNAL(event) event_signal_now(false, event, __func__)
/*
* Send signal to all currently waiting threads or drop signal if there are no
* currently waiting threads.
*/
-#define EVENT_BROADCAST(event) \
- event_signal_now(true, event, __func__)
+#define EVENT_BROADCAST(event) event_signal_now(true, event, __func__)
-#endif /* _CONMGR_EVENTS_H */
+#endif
diff --git a/src/common/macros.h b/src/common/macros.h
index 45598c0..b61d535 100644
--- a/src/common/macros.h
+++ b/src/common/macros.h
@@ -286,12 +286,12 @@
/* Get __typeof__(s->f) */
#define STRUCT_FIELD_TYPEOF(s, f) __typeof__(((s *) (NULL))->f)
-#define SWAP(x, y) \
-do { \
- __typeof__(x) b = x; \
- x = y; \
- y = b; \
-} while (0)
+#define SWAP(x, y) \
+ do { \
+ __typeof__(x) b = (x); \
+ (x) = (y); \
+ (y) = b; \
+ } while (0)
/* macro to force stringification */
#define XSTRINGIFY(s) XSTRINGIFY2(s)
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 35fc4c5..d38076e 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -2861,6 +2861,11 @@
/* Malloc space ahead of time to avoid realloc inside of xstrcat. */
char *state_str = xmalloc(100);
+ if (state == NO_VAL) {
+ xstrcat(state_str, "UNINITIALISED");
+ return state_str;
+ }
+
/* Process JOB_STATE_BASE */
switch (state & JOB_STATE_BASE) {
case JOB_PENDING:
diff --git a/src/common/slurm_time.c b/src/common/slurm_time.c
index 380c957..a199a7f 100644
--- a/src/common/slurm_time.c
+++ b/src/common/slurm_time.c
@@ -155,13 +155,17 @@
if (!ts.tv_nsec && !ts.tv_sec) {
return snprintf(buffer, buffer_len, "%s",
- (abs_time ? "now" : "None"));
+ (abs_time ? "NEVER" : "now"));
}
ts = timespec_normalize(ts);
if (abs_time) {
- ts = timespec_normalize(timespec_rem(ts, timespec_now()));
+ const timespec_diff_ns_t tdiff =
+ timespec_diff_ns(timespec_now(), ts);
+
+ ts = tdiff.diff;
+ negative = tdiff.after;
if (!ts.tv_nsec && !ts.tv_sec) {
return snprintf(buffer, buffer_len, "now");
@@ -169,7 +173,7 @@
}
/* Force positive time */
- if (ts.tv_sec < 0) {
+ if ((ts.tv_sec < 0) || (ts.tv_nsec < 0)) {
negative = true;
ts.tv_sec *= -1;
@@ -242,25 +246,39 @@
return wrote;
}
-extern timespec_t timespec_normalize(timespec_t ts)
+/* Normalize nsec to less than a second */
+static timespec_t _normalize(timespec_t ts)
{
- /* Force direction of time to be uniform */
- if ((ts.tv_nsec < 0) && (ts.tv_sec > 0)) {
- ts.tv_sec++;
- ts.tv_nsec = NSEC_IN_SEC + ts.tv_nsec;
- } else if ((ts.tv_nsec > 0) && (ts.tv_sec < 0)) {
- ts.tv_sec--;
- ts.tv_nsec = NSEC_IN_SEC - ts.tv_nsec;
- }
-
return (timespec_t) {
.tv_sec = ts.tv_sec + (ts.tv_nsec / NSEC_IN_SEC),
.tv_nsec = (ts.tv_nsec % NSEC_IN_SEC),
};
}
-extern timespec_t timespec_add(const timespec_t x, const timespec_t y)
+/* Force direction of time to be uniform */
+static timespec_t _uniform(timespec_t ts)
{
+ if ((ts.tv_nsec < 0) && (ts.tv_sec > 0)) {
+ ts.tv_sec--;
+ ts.tv_nsec = (NSEC_IN_SEC + ts.tv_nsec);
+ } else if ((ts.tv_nsec > 0) && (ts.tv_sec < 0)) {
+ ts.tv_sec++;
+ ts.tv_nsec = (NSEC_IN_SEC - ts.tv_nsec);
+ }
+
+ return ts;
+}
+
+extern timespec_t timespec_normalize(timespec_t ts)
+{
+ return _normalize(_uniform(_normalize(ts)));
+}
+
+extern timespec_t timespec_add(timespec_t x, timespec_t y)
+{
+ x = timespec_normalize(x);
+ y = timespec_normalize(y);
+
/* Use 64bit accumulators to avoid overflow */
return timespec_normalize((timespec_t) {
.tv_sec = (((uint64_t) x.tv_sec) + ((uint64_t) y.tv_sec)),
@@ -268,25 +286,14 @@
});
}
-extern timespec_t timespec_rem(const timespec_t x, const timespec_t y)
+extern timespec_t timespec_rem(timespec_t x, timespec_t y)
{
- /* Use 64bit accumulators to avoid underflow */
- int64_t s = (((uint64_t) x.tv_sec) - ((uint64_t) y.tv_sec));
- int64_t ns = (((uint64_t) x.tv_nsec) - ((uint64_t) y.tv_nsec));
-
- /* reject underflow of time */
- if (s <= 0)
- return (timespec_t) {0};
-
- /* force ns to be positive */
- if (ns < 0) {
- s--;
- ns = NSEC_IN_SEC - ns;
- }
+ x = timespec_normalize(x);
+ y = timespec_normalize(y);
return timespec_normalize((timespec_t) {
- .tv_sec = s,
- .tv_nsec = ns,
+ .tv_sec = (((uint64_t) x.tv_sec) - ((uint64_t) y.tv_sec)),
+ .tv_nsec = (((uint64_t) x.tv_nsec) - ((uint64_t) y.tv_nsec)),
});
}
@@ -309,31 +316,20 @@
extern timespec_diff_ns_t timespec_diff_ns(const timespec_t x,
const timespec_t y)
{
- /* Use 64bit accumulators to catch underflows */
- int64_t s = (((int64_t) x.tv_sec) - ((int64_t) y.tv_sec));
- int64_t ns = (((int64_t) x.tv_nsec) - ((int64_t) y.tv_nsec));
+ timespec_t ts = timespec_rem(x, y);
- /* Adjust positive nanoseconds if seconds is negative */
- if ((ns > 0) && (s < 0)) {
- s += 1;
- ns -= NSEC_IN_SEC;
- }
-
- if (s < 0)
+ if ((ts.tv_sec < 0) || (ts.tv_nsec < 0))
return (timespec_diff_ns_t) {
.after = false,
- .diff = {
- .tv_sec = (-1 * s),
- .tv_nsec = (-1 * ns),
- },
+ .diff = ((timespec_t) {
+ .tv_sec = (-1 * ts.tv_sec),
+ .tv_nsec = (-1 * ts.tv_nsec),
+ }),
};
else
return (timespec_diff_ns_t) {
.after = true,
- .diff = {
- .tv_sec = s,
- .tv_nsec = ns,
- },
+ .diff = ts,
};
}
diff --git a/src/common/slurm_time.h b/src/common/slurm_time.h
index aa6ad5d..abe1d29 100644
--- a/src/common/slurm_time.h
+++ b/src/common/slurm_time.h
@@ -122,10 +122,10 @@
extern timespec_t timespec_normalize(timespec_t ts);
/* Add timestamp X to timestamp Y */
-extern timespec_t timespec_add(const timespec_t x, const timespec_t y);
+extern timespec_t timespec_add(timespec_t x, timespec_t y);
/* Subtract timestamp Y from timestamp X */
-extern timespec_t timespec_rem(const timespec_t x, const timespec_t y);
+extern timespec_t timespec_rem(timespec_t x, timespec_t y);
/* Is timestamp X after timestamp Y */
extern bool timespec_is_after(const timespec_t x, const timespec_t y);
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index b9c5274..b0c5bf9 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -1443,25 +1443,14 @@
return SLURM_ERROR;
}
-/*
- * Get the memory limits of the step
- * Returns uid of the running step if successful. On error returns -1.
- */
-extern int stepd_get_mem_limits(int fd, uint16_t protocol_version,
- slurmstepd_mem_info_t *stepd_mem_info)
+extern int stepd_get_mem_limit(int fd, uint16_t protocol_version,
+ uint64_t *job_mem_limit)
{
int req = REQUEST_STEP_MEM_LIMITS;
- xassert(stepd_mem_info);
- memset(stepd_mem_info, 0, sizeof(slurmstepd_mem_info_t));
+ safe_write(fd, &req, sizeof(int));
- if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
- safe_write(fd, &req, sizeof(int));
-
- safe_read(fd, &stepd_mem_info->job_mem_limit, sizeof(uint32_t));
- safe_read(fd, &stepd_mem_info->step_mem_limit,
- sizeof(uint32_t));
- }
+ safe_read(fd, &job_mem_limit, sizeof(uint64_t));
return SLURM_SUCCESS;
rwfail:
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index 7fb8901..0ae997a 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -316,10 +316,9 @@
/*
* Get the memory limits of the step
- * Returns uid of the running step if successful. On error returns -1.
*/
-extern int stepd_get_mem_limits(int fd, uint16_t protocol_version,
- slurmstepd_mem_info_t *stepd_mem_info);
+extern int stepd_get_mem_limit(int fd, uint16_t protocol_version,
+ uint64_t *job_mem_limit);
/*
* Get the uid of the step
diff --git a/src/common/threadpool.c b/src/common/threadpool.c
index d66f1a0..c02203b 100644
--- a/src/common/threadpool.c
+++ b/src/common/threadpool.c
@@ -141,7 +141,13 @@
static void _set_thread_name(const char *name)
{
#if HAVE_SYS_PRCTL_H
- xassert(strlen(name) < PRCTL_BUF_BYTES);
+
+#ifndef NDEBUG
+ if (strlen(name) >= PRCTL_BUF_BYTES)
+ warning("Thread name truncated[%zu/%zu]: %s",
+ (uint64_t) strlen(name), (uint64_t) PRCTL_BUF_BYTES,
+ name);
+#endif
if (prctl(PR_SET_NAME, name, NULL, NULL, NULL))
error("%s: cannot set process name to %s %m", __func__, name);
@@ -150,13 +156,11 @@
static void _thread_free(thread_t *thread)
{
-#ifdef MEMORY_LEAK_DEBUG
xassert(thread);
xassert(thread->magic == THREAD_MAGIC);
thread->magic = ~THREAD_MAGIC;
xfree(thread);
-#endif
}
static void *_thread(void *arg)
diff --git a/src/conmgr/Makefile.am b/src/conmgr/Makefile.am
index f385152..21bf61d 100644
--- a/src/conmgr/Makefile.am
+++ b/src/conmgr/Makefile.am
@@ -13,8 +13,6 @@
conmgr.h \
delayed.c \
delayed.h \
- events.c \
- events.h \
io.c \
mgr.h \
poll.c \
diff --git a/src/conmgr/Makefile.in b/src/conmgr/Makefile.in
index 54f88fc..32c0bb2 100644
--- a/src/conmgr/Makefile.in
+++ b/src/conmgr/Makefile.in
@@ -163,8 +163,8 @@
LTLIBRARIES = $(noinst_LTLIBRARIES)
libconmgr_la_LIBADD =
@HAVE_EPOLL_TRUE@am__objects_1 = epoll.lo
-am_libconmgr_la_OBJECTS = con.lo conmgr.lo delayed.lo events.lo io.lo \
- poll.lo polling.lo rpc.lo signals.lo tls.lo tls_fingerprint.lo \
+am_libconmgr_la_OBJECTS = con.lo conmgr.lo delayed.lo io.lo poll.lo \
+ polling.lo rpc.lo signals.lo tls.lo tls_fingerprint.lo \
watch.lo work.lo workers.lo $(am__objects_1)
libconmgr_la_OBJECTS = $(am_libconmgr_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
@@ -194,7 +194,7 @@
am__maybe_remake_depfiles = depfiles
am__depfiles_remade = ./$(DEPDIR)/con.Plo ./$(DEPDIR)/conmgr.Plo \
./$(DEPDIR)/delayed.Plo ./$(DEPDIR)/epoll.Plo \
- ./$(DEPDIR)/events.Plo ./$(DEPDIR)/io.Plo ./$(DEPDIR)/poll.Plo \
+ ./$(DEPDIR)/io.Plo ./$(DEPDIR)/poll.Plo \
./$(DEPDIR)/polling.Plo ./$(DEPDIR)/rpc.Plo \
./$(DEPDIR)/signals.Plo ./$(DEPDIR)/tls.Plo \
./$(DEPDIR)/tls_fingerprint.Plo ./$(DEPDIR)/watch.Plo \
@@ -505,9 +505,9 @@
AM_CPPFLAGS = -I$(top_srcdir)
noinst_LTLIBRARIES = libconmgr.la
libconmgr_la_SOURCES = con.c conmgr.c conmgr.h delayed.c delayed.h \
- events.c events.h io.c mgr.h poll.c polling.c polling.h rpc.c \
- signals.c signals.h tls.c tls.h tls_fingerprint.c \
- tls_fingerprint.h watch.c work.c workers.c $(am__append_1)
+ io.c mgr.h poll.c polling.c polling.h rpc.c signals.c \
+ signals.h tls.c tls.h tls_fingerprint.c tls_fingerprint.h \
+ watch.c work.c workers.c $(am__append_1)
libconmgr_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic
# This was made so we could export all symbols from libconmgr
@@ -573,7 +573,6 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conmgr.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/delayed.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/epoll.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/events.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/io.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poll.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/polling.Plo@am__quote@ # am--include-marker
@@ -713,7 +712,6 @@
-rm -f ./$(DEPDIR)/conmgr.Plo
-rm -f ./$(DEPDIR)/delayed.Plo
-rm -f ./$(DEPDIR)/epoll.Plo
- -rm -f ./$(DEPDIR)/events.Plo
-rm -f ./$(DEPDIR)/io.Plo
-rm -f ./$(DEPDIR)/poll.Plo
-rm -f ./$(DEPDIR)/polling.Plo
@@ -773,7 +771,6 @@
-rm -f ./$(DEPDIR)/conmgr.Plo
-rm -f ./$(DEPDIR)/delayed.Plo
-rm -f ./$(DEPDIR)/epoll.Plo
- -rm -f ./$(DEPDIR)/events.Plo
-rm -f ./$(DEPDIR)/io.Plo
-rm -f ./$(DEPDIR)/poll.Plo
-rm -f ./$(DEPDIR)/polling.Plo
diff --git a/src/conmgr/epoll.c b/src/conmgr/epoll.c
index bee7030..56c0f27 100644
--- a/src/conmgr/epoll.c
+++ b/src/conmgr/epoll.c
@@ -42,6 +42,7 @@
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
+#include "src/common/events.h"
#include "src/common/fd.h"
#include "src/common/log.h"
#include "src/common/macros.h"
@@ -52,7 +53,6 @@
#include "src/common/xstring.h"
#include "src/conmgr/polling.h"
-#include "src/conmgr/events.h"
/*
* Size event count for 1 input and 1 output per connection and
@@ -607,7 +607,7 @@
safe_write(fd, buf, 1);
if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
- END_TIMER3(NULL, 0);
+ END_TIMER;
log_flag(CONMGR, "%s->%s: [EPOLL] interrupt byte sent in %s",
caller, __func__, TIMER_STR());
}
diff --git a/src/conmgr/mgr.h b/src/conmgr/mgr.h
index b64bb82..02b59da 100644
--- a/src/conmgr/mgr.h
+++ b/src/conmgr/mgr.h
@@ -50,11 +50,11 @@
#include "slurm/slurm.h"
+#include "src/common/events.h"
#include "src/common/pack.h"
#include "src/common/probes.h"
#include "src/conmgr/conmgr.h"
-#include "src/conmgr/events.h"
#include "src/conmgr/polling.h"
/* Default buffer to 1 page */
diff --git a/src/conmgr/poll.c b/src/conmgr/poll.c
index a6658a1..c1223ce 100644
--- a/src/conmgr/poll.c
+++ b/src/conmgr/poll.c
@@ -43,6 +43,7 @@
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
+#include "src/common/events.h"
#include "src/common/fd.h"
#include "src/common/log.h"
#include "src/common/macros.h"
@@ -52,7 +53,6 @@
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
-#include "src/conmgr/events.h"
#include "src/conmgr/polling.h"
/*
@@ -643,7 +643,7 @@
safe_write(fd, buf, 1);
if (slurm_conf.debug_flags & DEBUG_FLAG_CONMGR) {
- END_TIMER3(NULL, 0);
+ END_TIMER;
log_flag(CONMGR, "%s->%s: [POLL] interrupt byte sent in %s",
caller, __func__, TIMER_STR());
}
diff --git a/src/conmgr/watch.c b/src/conmgr/watch.c
index cc24b4a..0da6919 100644
--- a/src/conmgr/watch.c
+++ b/src/conmgr/watch.c
@@ -48,6 +48,7 @@
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
+#include "src/common/events.h"
#include "src/common/fd.h"
#include "src/common/list.h"
#include "src/common/macros.h"
@@ -59,7 +60,6 @@
#include "src/conmgr/conmgr.h"
#include "src/conmgr/delayed.h"
-#include "src/conmgr/events.h"
#include "src/conmgr/mgr.h"
#include "src/conmgr/polling.h"
#include "src/conmgr/signals.h"
diff --git a/src/conmgr/work.c b/src/conmgr/work.c
index 640569b..42e5d57 100644
--- a/src/conmgr/work.c
+++ b/src/conmgr/work.c
@@ -33,6 +33,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
+#include "src/common/events.h"
#include "src/common/list.h"
#include "src/common/macros.h"
#include "src/common/proc_args.h"
@@ -42,7 +43,6 @@
#include "src/conmgr/conmgr.h"
#include "src/conmgr/delayed.h"
-#include "src/conmgr/events.h"
#include "src/conmgr/mgr.h"
#include "src/conmgr/signals.h"
diff --git a/src/conmgr/workers.c b/src/conmgr/workers.c
index c7b1933..a493658 100644
--- a/src/conmgr/workers.c
+++ b/src/conmgr/workers.c
@@ -35,6 +35,7 @@
#include "slurm/slurm_errno.h"
+#include "src/common/events.h"
#include "src/common/macros.h"
#include "src/common/probes.h"
#include "src/common/read_config.h"
@@ -45,7 +46,6 @@
#include "src/common/xsched.h"
#include "src/conmgr/conmgr.h"
-#include "src/conmgr/events.h"
#include "src/conmgr/mgr.h"
/* Limit automatically set default thread count */
diff --git a/src/plugins/auth/slurm/sack.c b/src/plugins/auth/slurm/sack.c
index 44db36d..8f020f1 100644
--- a/src/plugins/auth/slurm/sack.c
+++ b/src/plugins/auth/slurm/sack.c
@@ -365,9 +365,12 @@
extern int auth_p_get_reconfig_fd(void)
{
- /* Prepare for reconfigure */
- setenvf(NULL, SACK_RECONFIG_ENV, "%d", sack_fd);
- fd_set_noclose_on_exec(sack_fd);
+ if (sack_fd >= 0) {
+ /* Prepare for reconfigure */
+ setenvf(NULL, SACK_RECONFIG_ENV, "%d", sack_fd);
+ fd_set_noclose_on_exec(sack_fd);
+ }
+ /* sack_fd init'd to -1 and won't be added for skip close() later. */
return sack_fd;
}
diff --git a/src/plugins/namespace/linux/namespace_linux.c b/src/plugins/namespace/linux/namespace_linux.c
index 1eb1726..616ca61 100644
--- a/src/plugins/namespace/linux/namespace_linux.c
+++ b/src/plugins/namespace/linux/namespace_linux.c
@@ -80,6 +80,7 @@
static ns_conf_t *ns_conf = NULL;
static bool plugin_disabled = false;
+static pid_t ns_pid = -1;
/* NS_L_NS must be last */
enum ns_l_types {
@@ -181,7 +182,7 @@
return _delete_ns(job_id);
}
- close(fd);
+ fd_close(&fd);
return SLURM_SUCCESS;
}
@@ -212,10 +213,7 @@
#ifdef MEMORY_LEAK_DEBUG
for (int i = 0; i < NS_L_END; i++) {
xfree(ns_l_enabled[i].path);
- if (ns_l_enabled[i].fd >= 0) {
- close(ns_l_enabled[i].fd);
- ns_l_enabled[i].fd = -1;
- }
+ fd_close(&ns_l_enabled[i].fd);
}
free_ns_conf();
#endif
@@ -551,7 +549,7 @@
static int _clonens_user_setup(stepd_step_rec_t *step, pid_t pid)
{
- int fd = 0, rc = SLURM_SUCCESS;
+ int fd = -1, rc = SLURM_SUCCESS;
char *tmpstr = NULL;
if (!ns_l_enabled[NS_L_USER].enabled)
@@ -596,8 +594,7 @@
rc = SLURM_ERROR;
goto end_it;
}
- if (fd >= 0)
- close(fd);
+ fd_close(&fd);
xfree(tmpstr);
xstrfmtcat(tmpstr, "/proc/%d/gid_map", pid);
@@ -614,8 +611,7 @@
}
end_it:
- if (fd >= 0)
- close(fd);
+ fd_close(&fd);
xfree(tmpstr);
return rc;
}
@@ -630,7 +626,6 @@
unsigned long tls = 0;
sem_t *sem1 = NULL;
sem_t *sem2 = NULL;
- pid_t cpid;
_create_paths(step->step_id.job_id, &job_mount, &ns_base, &src_bind);
@@ -675,7 +670,7 @@
rc = -1;
goto exit2;
}
- close(fd);
+ fd_close(&fd);
}
/* Create location for bind mounts to go */
@@ -744,14 +739,14 @@
error("%s: sem_init: %m", __func__);
goto exit1;
}
- cpid = sys_clone(ns_conf->clonensflags|SIGCHLD, &parent_tid,
- &child_tid, tls);
+ ns_pid = sys_clone(ns_conf->clonensflags | SIGCHLD, &parent_tid,
+ &child_tid, tls);
- if (cpid == -1) {
+ if (ns_pid == -1) {
error("%s: sys_clone failed: %m", __func__);
rc = -1;
goto exit1;
- } else if (cpid == 0) {
+ } else if (ns_pid == 0) {
_create_ns_child(step, src_bind, job_mount, sem1, sem2);
} else {
char *proc_path = NULL;
@@ -763,7 +758,7 @@
for (int i = 0; i < NS_L_END; i++) {
if (!ns_l_enabled[i].enabled)
continue;
- xstrfmtcat(proc_path, "/proc/%u/ns/%s", cpid,
+ xstrfmtcat(proc_path, "/proc/%u/ns/%s", ns_pid,
ns_l_enabled[i].proc_name);
rc = mount(proc_path, ns_l_enabled[i].path, NULL,
MS_BIND, NULL);
@@ -780,7 +775,7 @@
}
/* setup users before setting up the rest of the container */
- if ((rc = _clonens_user_setup(step, cpid))) {
+ if ((rc = _clonens_user_setup(step, ns_pid))) {
error("%s: Unable to prepare user namespace.",
__func__);
/* error needs to fall though here */
@@ -799,9 +794,9 @@
goto exit1;
}
- if (proctrack_g_add(step, cpid) != SLURM_SUCCESS) {
+ if (proctrack_g_add(step, ns_pid) != SLURM_SUCCESS) {
error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.",
- __func__, step->step_id.job_id, cpid);
+ __func__, step->step_id.job_id, ns_pid);
rc = SLURM_ERROR;
goto exit1;
}
@@ -952,8 +947,7 @@
if (!ns_l_enabled[i].enabled)
continue;
rc = setns(ns_l_enabled[i].fd, 0);
- close(ns_l_enabled[i].fd);
- ns_l_enabled[i].fd = -1;
+ fd_close(&ns_l_enabled[i].fd);
if (rc) {
error("%s: setns failed for %s: %m",
__func__, ns_l_enabled[i].path);
@@ -1063,6 +1057,17 @@
if (plugin_disabled)
return SLURM_SUCCESS;
+ if (ns_pid) {
+ int wstatus;
+ /*
+ * The namespace process may have been signaled already, but
+ * kill it to be sure.
+ */
+ kill(ns_pid, SIGKILL);
+ waitpid(ns_pid, &wstatus, 0);
+ ns_pid = -1;
+ }
+
return _delete_ns(step_id->job_id);
}
@@ -1167,7 +1172,6 @@
rc = SLURM_SUCCESS;
}
end:
- if (fd >= 0)
- close(fd);
+ fd_close(&fd);
return rc;
}
diff --git a/src/plugins/select/cons_tres/job_test.c b/src/plugins/select/cons_tres/job_test.c
index d6a1739..b78a752 100644
--- a/src/plugins/select/cons_tres/job_test.c
+++ b/src/plugins/select/cons_tres/job_test.c
@@ -541,6 +541,8 @@
* Do not allocate more jobs to nodes with completing jobs,
* backfill scheduler independently handles completing nodes
*/
+ log_flag(SELECT_TYPE, "Node %s is in COMPLETING state, skip trying to schedule %pJ on this node.",
+ node_ptr->name, job_ptr);
return NULL;
}
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index cae5aca..298d47a 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3419,9 +3419,6 @@
sizeof(job_record_t *));
job_array_hash_t = xcalloc(hash_table_size,
sizeof(job_record_t *));
- if (xstrcasestr(slurm_conf.sched_params,
- "enable_job_state_cache"))
- setup_job_state_hash(hash_table_size);
} else if (hash_table_size < (slurm_conf.max_job_cnt / 2)) {
/* If the MaxJobCount grows by too much, the hash table will
* be ineffective without rebuilding. We don't presently bother
@@ -7969,6 +7966,7 @@
/* Add a temporary job_ptr for node_features_g_job_valid */
job_ptr = xmalloc(sizeof(job_record_t));
+ job_ptr->magic = JOB_MAGIC;
job_ptr->details = xmalloc(sizeof(job_details_t));
/* Point, don't dup, so don't free */
job_ptr->details->features = job_desc->features;
@@ -8072,6 +8070,7 @@
FREE_NULL_LIST(job_ptr->details->feature_list);
FREE_NULL_LIST(job_ptr->details->prefer_list);
xfree(job_ptr->details);
+ job_ptr->magic = ~JOB_MAGIC;
xfree(job_ptr);
return rc;
diff --git a/src/slurmctld/job_state.c b/src/slurmctld/job_state.c
index 3520495..242ec52 100644
--- a/src/slurmctld/job_state.c
+++ b/src/slurmctld/job_state.c
@@ -33,97 +33,11 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
-#define _GNU_SOURCE
-#include <pthread.h>
-
#include "src/common/macros.h"
-#include "src/common/xahash.h"
-#include "src/common/xstring.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/slurmctld.h"
-/* Macro to only include the code inside of ONLY_DEBUG if !NDEBUG */
-#ifndef NDEBUG
-#define ONLY_DEBUG(...) __VA_ARGS__
-#else
-#define ONLY_DEBUG(...)
-#endif
-
-/*
- * Favor writer lock acquisition to avoid delaying the scheduling thread
- * when under heavy client load. Clients can be safely delayed, job launch
- * is most important.
- */
-#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
-#define CACHE_LOCK_INIT PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
-#else
-#define CACHE_LOCK_INIT PTHREAD_RWLOCK_INITIALIZER
-#endif
-
-#define JOB_STATE_MIMIC_RECORD(js) \
- &(const job_record_t) \
- { \
- .magic = JOB_MAGIC, .job_id = js->job_id, \
- .array_job_id = js->array_job_id, \
- .array_task_id = js->array_task_id, \
- .het_job_id = js->het_job_id, .job_state = js->job_state, \
- .array_recs = \
- (!js->task_id_bitmap ? \
- NULL : \
- &(job_array_struct_t){ \
- .task_cnt = \
- bit_size(js->task_id_bitmap), \
- .task_id_bitmap = js->task_id_bitmap, \
- }), \
- }
-
-#define ARRAY_JOB_STATE_MIMIC_RECORD(ajs) \
- &(const job_record_t) \
- { \
- .magic = JOB_MAGIC, .job_id = ajs->job_id, \
- .array_task_id = NO_VAL, \
- }
-
-#define ARRAY_TASK_STATE_KEY_BYTES sizeof(array_task_state_cached_t)
-#define ARRAY_TASK_STATE_KEY_JOB_ID(find_array_job_id, find_array_task_id) \
- &(array_task_state_cached_t) \
- { \
- ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, ) \
- .job_id = NO_VAL, /* not used in search */ \
- .array_job_id = find_array_job_id, \
- .array_task_id = find_array_task_id, \
- }
-#define ARRAY_TASK_STATE_KEY_JOB_PTR(job_ptr) \
- &(array_task_state_cached_t) \
- { \
- ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, ) \
- .job_id = job_ptr->job_id, \
- .array_job_id = job_ptr->array_job_id, \
- .array_task_id = job_ptr->array_task_id, \
- }
-#define ARRAY_TASK_STATE_KEY_SELECTED_STEP(selected_step) \
- &(array_task_state_cached_t) \
- { \
- ONLY_DEBUG(.magic = MAGIC_ARRAY_TASK_STATE_CACHED, ) \
- .job_id = ((selected_step->array_task_id < NO_VAL) ? \
- NO_VAL : \
- selected_step->step_id.job_id), \
- .array_job_id = \
- ((selected_step->array_task_id < NO_VAL) ? \
- selected_step->step_id.job_id : \
- 0), \
- .array_task_id = selected_step->array_task_id, \
- }
-
-#define ARRAY_TASK_STATE_MIMIC_RECORD(ats) \
- &(const job_record_t) \
- { \
- .magic = JOB_MAGIC, .job_id = ats->job_id, \
- .array_job_id = ats->array_job_id, \
- .array_task_id = ats->array_task_id, \
- }
-
#define MAGIC_JOB_STATE_ARGS 0x0a0beeee
typedef struct {
@@ -135,125 +49,8 @@
bool count_only;
} job_state_args_t;
-#define MAGIC_CACHE_TABLE_STATE 0x1a0beffe
-typedef struct {
- ONLY_DEBUG(int magic;) /* MAGIC_CACHE_TABLE_STATE */
- int table_size;
-} cache_table_state_t;
-
-#define MAGIC_JOB_STATE_CACHED 0x1aa0affb
-typedef struct {
- ONLY_DEBUG(int magic;) /* MAGIC_JOB_STATE_CACHED */
- uint32_t job_id;
- uint32_t job_state;
- uint32_t het_job_id;
- uint32_t array_job_id;
- uint32_t array_task_id;
- bitstr_t *task_id_bitmap;
-} job_state_cached_t;
-
-/* maps array job_id & task_id to non-array job_id */
-#define MAGIC_ARRAY_TASK_STATE_CACHED 0xb2a00fcb
-typedef struct {
- ONLY_DEBUG(int magic;) /* MAGIC_ARRAY_TASK_STATE_CACHED */
- uint32_t job_id;
- uint32_t array_job_id;
- uint32_t array_task_id;
-} array_task_state_cached_t;
-
-/* maps array job_id to circular linked list of non-array job_ids */
-#define MAGIC_ARRAY_JOB_STATE_CACHED 0xb21f0fca
-typedef struct {
- ONLY_DEBUG(int magic;) /* MAGIC_ARRAY_JOB_STATE_CACHED */
- uint32_t job_id;
- uint32_t next_job_id; /* next job_id of member of array or 0 */
-} array_job_state_cached_t;
-
-/*
- * Caches job state (outside of job_mgr.c and JOB_LOCK).
- *
- * Hash: job_id
- * entry type: job_state_cached_t
- * Includes all Job ID info required to identify each job.
- * 1 job per entry.
- * state type: cache_table_state_t
- * Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *cache_table = NULL;
-/*
- * Caches linked list of Array job's job_id to allow finding every job member of
- * an array job.
- *
- * Hash: job_id
- * entry type: job_state_cached_t
- * Includes all Job ID info required to identify each job.
- * 1 job per entry.
- * state type: cache_table_state_t
- * Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *array_job_cache_table = NULL;
-/*
- * Caches mapping of array_job_id & array_task_id to job_id of every member of
- * an array job.
- *
- * Hash: array_job_id & array_task_id
- * entry type: array_task_state_cached_t
- * state type: cache_table_state_t
- * Maintains table_size for number of jobs reserved in hashtable
- */
-static xahash_table_t *array_task_cache_table = NULL;
-static pthread_rwlock_t cache_lock = CACHE_LOCK_INIT;
-
-#ifndef NDEBUG
-
-#define DEF_DEBUG_TIMER DEF_TIMERS
-#define START_DEBUG_TIMER START_TIMER
-#define END_DEBUG_TIMER END_TIMER2(__func__)
-
-#define T(x) { x, XSTRINGIFY(x) }
-static const struct {
- uint32_t flag;
- char *string;
-} job_flags[] = {
- T(JOB_LAUNCH_FAILED),
- T(JOB_REQUEUE),
- T(JOB_REQUEUE_HOLD),
- T(JOB_SPECIAL_EXIT),
- T(JOB_RESIZING),
- T(JOB_CONFIGURING),
- T(JOB_COMPLETING),
- T(JOB_STOPPED),
- T(JOB_RECONFIG_FAIL),
- T(JOB_POWER_UP_NODE),
- T(JOB_REVOKED),
- T(JOB_REQUEUE_FED),
- T(JOB_RESV_DEL_HOLD),
- T(JOB_SIGNALING),
- T(JOB_STAGE_OUT),
- T(JOB_EXPEDITING),
-};
-
-static void _check_job_state(const uint32_t state)
-{
- uint32_t flags;
-
- if (!(slurm_conf.debug_flags & DEBUG_FLAG_TRACE_JOBS))
- return;
-
- flags = (state & JOB_STATE_FLAGS);
-
- xassert((state & JOB_STATE_BASE) < JOB_END);
-
- for (int i = 0; i < ARRAY_SIZE(job_flags); i++)
- if ((flags & job_flags[i].flag) == job_flags[i].flag)
- flags &= ~(job_flags[i].flag);
-
- /* catch any bits that are not known flags */
- xassert(!flags);
-}
-
static void _log_job_state_change(const job_record_t *job_ptr,
- const uint32_t new_state)
+ const uint32_t new_state, const char *caller)
{
char *before_str, *after_str;
@@ -263,57 +60,32 @@
before_str = job_state_string_complete(job_ptr->job_state);
after_str = job_state_string_complete(new_state);
- if (job_ptr->job_state == new_state)
- log_flag(TRACE_JOBS, "%s: [%pJ] no-op change state: %s",
- __func__, job_ptr, before_str);
- else
+ if (job_ptr->job_state == new_state) {
+ if (get_log_level() >= LOG_LEVEL_DEBUG4)
+ log_flag(TRACE_JOBS, "%s: [%pJ] no-op change state: %s",
+ caller, job_ptr, before_str);
+ } else {
log_flag(TRACE_JOBS, "%s: [%pJ] change state: %s -> %s",
- __func__, job_ptr, before_str, after_str);
+ caller, job_ptr, before_str, after_str);
+ }
xfree(before_str);
xfree(after_str);
}
-#else /* NDEBUG */
-
-/*
- * Replace magic/sanity checks with no-ops that reference args to avoid compile
- * warnings.
- */
-
-#define _check_job_state(state) {(void) state;}
-#define _log_job_state_change(job_ptr, new_state) \
- {(void) job_ptr; (void) new_state;}
-#define DEF_DEBUG_TIMER
-#define START_DEBUG_TIMER
-#define END_DEBUG_TIMER
-
-#endif /* NDEBUG */
-
-#define _log_array_job_chain(js, caller, fmt, ...) {}
-#define _check_all_jobs(compare_job_ptrs) {}
-#define _is_debug() (false)
-#define LOG(fmt, ...) log_flag(TRACE_JOBS, "%s: " fmt, __func__, ##__VA_ARGS__)
-#define _check_job_id(job_id_ptr) {(void) job_id_ptr;}
-#define _check_job_magic(js) {(void) js;}
-#define _check_array_job_magic(ajs) {(void) ajs;}
-#define _check_array_job_magic_links(job_id, array_job_id, should_be_linked) \
- {(void) job_id; (void) array_job_id; (void) should_be_linked;}
-#define _check_array_task_magic(ats) {(void) ats;}
-#define _check_state_magic(state) {(void) state;}
-
-extern void job_state_set(job_record_t *job_ptr, uint32_t state)
+extern void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+ const char *caller)
{
xassert(verify_lock(JOB_LOCK, WRITE_LOCK));
- _check_job_state(state);
- _log_job_state_change(job_ptr, state);
+ _log_job_state_change(job_ptr, state, caller);
- on_job_state_change(job_ptr, state);
+ slurm_on_job_state_change(job_ptr, state, caller);
job_ptr->job_state = state;
}
-extern void job_state_set_flag(job_record_t *job_ptr, uint32_t flag)
+extern void slurm_job_state_set_flag(job_record_t *job_ptr, uint32_t flag,
+ const char *caller)
{
uint32_t job_state;
@@ -322,15 +94,15 @@
xassert(flag & JOB_STATE_FLAGS);
job_state = job_ptr->job_state | flag;
- _check_job_state(job_state);
- _log_job_state_change(job_ptr, job_state);
+ _log_job_state_change(job_ptr, job_state, caller);
- on_job_state_change(job_ptr, job_state);
+ slurm_on_job_state_change(job_ptr, job_state, caller);
job_ptr->job_state = job_state;
}
-extern void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag)
+extern void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+ const char *caller)
{
uint32_t job_state;
@@ -339,10 +111,9 @@
xassert(flag & JOB_STATE_FLAGS);
job_state = job_ptr->job_state & ~flag;
- _check_job_state(job_state);
- _log_job_state_change(job_ptr, job_state);
+ _log_job_state_change(job_ptr, job_state, caller);
- on_job_state_change(job_ptr, job_state);
+ slurm_on_job_state_change(job_ptr, job_state, caller);
job_ptr->job_state = job_state;
}
@@ -424,228 +195,6 @@
}
}
-static int _add_cache_job(job_state_args_t *args, const job_state_cached_t *js)
-{
- job_state_response_job_t *rjob;
-
- xassert(args->magic == MAGIC_JOB_STATE_ARGS);
- xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
- rjob = _append_job_state(args);
-
- if (args->count_only)
- return SLURM_SUCCESS;
-
- if (!rjob) {
- LOG("[%pJ] packing at %d/%d failed",
- JOB_STATE_MIMIC_RECORD(js), args->count, args->jobs_count);
- return ERANGE;
- }
-
- if (_is_debug()) {
- char *bitstr =
- (js->task_id_bitmap ? bit_fmt_full(js->task_id_bitmap) :
- NULL);
- const size_t bits =
- (js->task_id_bitmap ? bit_size(js->task_id_bitmap) : 0);
-
- LOG("[%pJ] packing JobId=%u ArrayJobId=%u ArrayTaskId=%u array_task_id_bitmap[%zu]=%s HetJobId=%u state:%s",
- JOB_STATE_MIMIC_RECORD(js), js->job_id, js->array_job_id,
- js->array_task_id, bits, (bitstr ? bitstr : ""),
- js->het_job_id, job_state_string(js->job_state));
-
- (void) bits;
- xfree(bitstr);
- }
-
- rjob->job_id = js->job_id;
- rjob->array_job_id = js->array_job_id;
- rjob->array_task_id = js->array_task_id;
- if (js->task_id_bitmap)
- rjob->array_task_id_bitmap = bit_copy(js->task_id_bitmap);
- rjob->het_job_id = js->het_job_id;
- rjob->state = js->job_state;
-
- return SLURM_SUCCESS;
-}
-
-static xahash_foreach_control_t _foreach_cache_job(void *entry, void *state_ptr,
- void *arg)
-{
- const job_state_cached_t *js = entry;
- ONLY_DEBUG(cache_table_state_t *state = state_ptr;)
- job_state_args_t *args = arg;
-
- xassert(!state || (state->magic == MAGIC_CACHE_TABLE_STATE));
- xassert(args->magic == MAGIC_JOB_STATE_ARGS);
- xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
- if (_add_cache_job(args, js))
- return XAHASH_FOREACH_FAIL;
-
- return XAHASH_FOREACH_CONT;
-}
-
-static void _find_job_state_cached_by_job_id(job_state_args_t *args,
- uint32_t job_id, bool resolve)
-{
- const job_state_cached_t *js;
-
- if (!(js = xahash_find_entry(cache_table, &job_id, sizeof(job_id)))) {
- LOG("[JobId=%u] Unable to resolve job", job_id);
- return;
- }
-
- _check_job_magic(js);
-
- LOG("[%pJ] Resolved from JobId=%u", JOB_STATE_MIMIC_RECORD(js), job_id);
-
- if (_add_cache_job(args, js))
- return;
-
- if (!resolve) {
- LOG("[%pJ] Not fully resolving job", JOB_STATE_MIMIC_RECORD(js));
- return;
- }
-
- if ((js->array_job_id > 0) && (js->array_job_id == js->job_id)) {
- array_job_state_cached_t *ajs;
-
- ajs = xahash_find_entry(array_job_cache_table,
- &js->array_job_id,
- sizeof(js->array_job_id));
-
- _check_array_job_magic(ajs);
-
- _log_array_job_chain(js, __func__,
- "Resolved %pJ with next: JobId=%u",
- ARRAY_JOB_STATE_MIMIC_RECORD(ajs),
- ajs->next_job_id);
-
- while (ajs->next_job_id != js->array_job_id) {
- const job_state_cached_t *next;
-
- if ((next = xahash_find_entry(
- cache_table, &ajs->next_job_id,
- sizeof(ajs->next_job_id)))) {
- LOG("[%pJ] Resolved to %pJ via %pJ",
- JOB_STATE_MIMIC_RECORD(js),
- JOB_STATE_MIMIC_RECORD(next),
- ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
- if (_add_cache_job(args, next))
- break;
- } else {
- fatal_abort("Unable to resolve next_job_id");
- }
-
- ajs = xahash_find_entry(array_job_cache_table,
- &ajs->next_job_id,
- sizeof(ajs->next_job_id));
- }
-
- xassert(ajs->next_job_id == js->array_job_id);
- } else if (js->het_job_id == js->job_id) {
- for (uint32_t i = 1; i < MAX_JOB_ID; i++) {
- const job_state_cached_t *hjs;
- uint32_t het_job_id = js->het_job_id + i;
-
- if ((hjs = xahash_find_entry(cache_table, &het_job_id,
- sizeof(het_job_id))) &&
- (hjs->het_job_id == js->het_job_id)) {
- LOG("[%pJ] Resolved HetJobId=%u+%u to %pJ",
- JOB_STATE_MIMIC_RECORD(js), job_id, i,
- JOB_STATE_MIMIC_RECORD(hjs));
- if (_add_cache_job(args, hjs))
- break;
- } else {
- /*
- * Next job not found or not part of the HetJob
- */
- break;
- }
- }
- } else {
- LOG("[%pJ] Nothing else to resolve",
- JOB_STATE_MIMIC_RECORD(js));
- }
-}
-
-static void _find_job_state_cached_by_id(job_state_args_t *args,
- const slurm_selected_step_t *filter)
-{
- char *filter_str = NULL;
-
- if (_is_debug())
- (void) fmt_job_id_string((slurm_selected_step_t *) filter,
- &filter_str);
-
- if (!filter->step_id.job_id) {
- /* 0 is never a valid job so just return now */
- goto cleanup;
- } else if (filter->step_id.job_id == NO_VAL) {
- /* walk all jobs */
- (void) xahash_foreach_entry(cache_table, _foreach_cache_job,
- args);
- goto cleanup;
- }
-
- xassert(!((filter->array_task_id != NO_VAL) &&
- (filter->het_job_offset != NO_VAL)));
-
- if (filter->array_task_id != NO_VAL) {
- const array_task_state_cached_t *ats;
-
- ats = xahash_find_entry(
- array_task_cache_table,
- ARRAY_TASK_STATE_KEY_SELECTED_STEP(filter),
- ARRAY_TASK_STATE_KEY_BYTES);
-
- if (ats) {
- slurm_selected_step_t aj_filter =
- SLURM_SELECTED_STEP_INITIALIZER;
- aj_filter.step_id.job_id = ats->job_id;
-
- _check_array_task_magic(ats);
- LOG("[%pJ] Resolved from %s",
- ARRAY_TASK_STATE_MIMIC_RECORD(ats), filter_str);
-
- _find_job_state_cached_by_id(args, &aj_filter);
- } else {
- LOG("[%s] Unable to resolve job", filter_str);
- }
- } else if (filter->het_job_offset != NO_VAL) {
- _find_job_state_cached_by_job_id(args,
- (filter->step_id.job_id +
- filter->het_job_offset),
- false);
- } else {
- _find_job_state_cached_by_job_id(args, filter->step_id.job_id,
- true);
- }
-
-cleanup:
- xfree(filter_str);
-}
-
-static void _dump_job_state_cached(job_state_args_t *args,
- const uint32_t filter_jobs_count,
- const slurm_selected_step_t *filter_jobs_ptr)
-{
- xassert(args->magic == MAGIC_JOB_STATE_ARGS);
-
- slurm_rwlock_rdlock(&cache_lock);
-
- if (!filter_jobs_count) {
- (void) xahash_foreach_entry(cache_table, _foreach_cache_job,
- args);
- } else {
- for (int i = 0; !args->rc && (i < filter_jobs_count); i++)
- _find_job_state_cached_by_id(args, &filter_jobs_ptr[i]);
- }
-
- slurm_rwlock_unlock(&cache_lock);
-}
-
extern int dump_job_state(const uint32_t filter_jobs_count,
const slurm_selected_step_t *filter_jobs_ptr,
uint32_t *jobs_count_ptr,
@@ -656,27 +205,14 @@
.magic = MAGIC_JOB_STATE_ARGS,
.count_only = true,
};
- /*
- * In order to avoid the time cost, we are not taking the cache_lock
- * to check if the cache_table pointer is !NULL as the cost of hitting
- * an invalid cached state here is very minor.
- */
- bool use_cache = cache_table;
- if (!use_cache)
- lock_slurmctld(job_read_lock);
+ lock_slurmctld(job_read_lock);
/*
* Loop once to grab the job count and then allocate the job array and
* then populate the array.
*/
-
- if (use_cache)
- _dump_job_state_cached(&args, filter_jobs_count,
- filter_jobs_ptr);
- else
- _dump_job_state_locked(&args, filter_jobs_count,
- filter_jobs_ptr);
+ _dump_job_state_locked(&args, filter_jobs_count, filter_jobs_ptr);
if (args.count > 0) {
if (!try_xrecalloc(args.jobs, args.count, sizeof(*args.jobs))) {
@@ -690,737 +226,20 @@
args.count_only = false;
args.count = 0;
- if (use_cache)
- _dump_job_state_cached(&args, filter_jobs_count,
- filter_jobs_ptr);
- else
- _dump_job_state_locked(&args, filter_jobs_count,
- filter_jobs_ptr);
+ _dump_job_state_locked(&args, filter_jobs_count,
+ filter_jobs_ptr);
}
*jobs_pptr = args.jobs;
*jobs_count_ptr = args.jobs_count;
cleanup:
- if (!use_cache)
- unlock_slurmctld(job_read_lock);
+ unlock_slurmctld(job_read_lock);
return args.rc;
}
-static void _sync_job_task_id_bitmap(const job_record_t *job_ptr,
- job_state_cached_t *js)
+extern void slurm_on_job_state_change(job_record_t *job_ptr, uint32_t new_state,
+ const char *caller)
{
- if (!job_ptr->array_recs) {
- if (job_ptr->array_task_id == NO_VAL) {
- /* before array job is split */
- LOG("[%pJ] ignoring array job without array_recs",
- JOB_STATE_MIMIC_RECORD(js));
- return;
- }
-
- /* conversion from meta job to normal job */
- xassert(!js->task_id_bitmap || (js->array_task_id == NO_VAL));
-
- if (_is_debug() && js->task_id_bitmap) {
- char *before = bit_fmt_full(js->task_id_bitmap);
- LOG("[%pJ] releasing array task_id_bitmap[%lu]: %s",
- JOB_STATE_MIMIC_RECORD(js),
- bit_size(js->task_id_bitmap), before);
- xfree(before);
- }
-
- /* bitmap removed by state change or was never there */
- FREE_NULL_BITMAP(js->task_id_bitmap);
-
- return;
- }
-
- if (!job_ptr->array_recs->task_id_bitmap) {
- uint32_t task_cnt = job_ptr->array_recs->pend_run_tasks;
-
- if ((job_ptr->job_state != JOB_PENDING) || !task_cnt) {
- if (_is_debug() &&
- (job_ptr->job_state != JOB_PENDING) &&
- js->task_id_bitmap) {
- char *before = bit_fmt_full(js->task_id_bitmap);
- LOG("[%pJ] job no longer pending: releasing array task_id_bitmap[%lu]: %s",
- JOB_STATE_MIMIC_RECORD(js),
- bit_size(js->task_id_bitmap), before);
- xfree(before);
- }
-
- if (_is_debug() && !task_cnt && js->task_id_bitmap)
- LOG("[%pJ] pending array job without pending task count",
- JOB_STATE_MIMIC_RECORD(js));
-
- /* bitmap removed by state change or was never there */
- FREE_NULL_BITMAP(js->task_id_bitmap);
-
- return;
- }
-
- if (js->task_id_bitmap &&
- (bit_size(js->task_id_bitmap) != task_cnt)) {
- LOG("[%pJ] array job task_id_bitmap changed from %lu to %u",
- JOB_STATE_MIMIC_RECORD(js),
- (js->task_id_bitmap ? bit_size(js->task_id_bitmap) :
- 0),
- task_cnt);
- FREE_NULL_BITMAP(js->task_id_bitmap);
- }
-
- if (!js->task_id_bitmap) {
- LOG("[%pJ] mimicking array without task_id_bitmap with new bitmap[%u]",
- JOB_STATE_MIMIC_RECORD(js), task_cnt);
- js->task_id_bitmap = bit_alloc(task_cnt);
- }
-
- xassert(js->task_id_bitmap);
- xassert(bit_size(js->task_id_bitmap) == task_cnt);
-
- bit_set_all(js->task_id_bitmap);
-
- if (_is_debug()) {
- char *map = bit_fmt_full(js->task_id_bitmap);
- LOG("[%pJ] mimicking array without bitmap as task_id_bitmap[%lu]: %s",
- JOB_STATE_MIMIC_RECORD(js),
- bit_size(js->task_id_bitmap), map);
- xfree(map);
- }
-
- return;
- }
-
- if (js->task_id_bitmap &&
- (bit_size(js->task_id_bitmap) ==
- bit_size(job_ptr->array_recs->task_id_bitmap))) {
- /* resync all bits */
-
- if (_is_debug()) {
- char *before = bit_fmt_full(js->task_id_bitmap);
- char *after = bit_fmt_full(job_ptr->array_recs
- ->task_id_bitmap);
- LOG("[%pJ] updating array task_id_bitmap[%lu]: %s -> %s",
- JOB_STATE_MIMIC_RECORD(js),
- bit_size(job_ptr->array_recs->task_id_bitmap),
- before, after);
- xfree(before);
- xfree(after);
- }
-
- bit_copybits(js->task_id_bitmap,
- job_ptr->array_recs->task_id_bitmap);
- } else {
- /* new bitmap or bit count changed */
-
- if (_is_debug()) {
- char *before =
- (js->task_id_bitmap ?
- bit_fmt_full(js->task_id_bitmap) :
- NULL);
- char *after =
- (job_ptr->array_recs->task_id_bitmap ?
- bit_fmt_full(
- job_ptr->array_recs
- ->task_id_bitmap) :
- NULL);
- LOG("[%pJ] new array task_id_bitmap[%lu]: %s -> %s",
- JOB_STATE_MIMIC_RECORD(js),
- (job_ptr->array_recs->task_id_bitmap ?
- bit_size(job_ptr->array_recs
- ->task_id_bitmap) :
- 0),
- (before ? before : "∅"), (after ? after : "∅"));
- xfree(before);
- xfree(after);
- }
-
- FREE_NULL_BITMAP(js->task_id_bitmap);
- js->task_id_bitmap =
- bit_copy(job_ptr->array_recs->task_id_bitmap);
- }
-}
-
-static void _link_array_job(const job_record_t *job_ptr, job_state_cached_t *js)
-{
- array_task_state_cached_t *ats;
- array_job_state_cached_t *ajs, *meta;
- const uint32_t job_id = job_ptr->job_id;
-
- xassert(!job_ptr->het_job_id);
- xassert(job_ptr->array_job_id > 0);
- xassert(js->array_job_id > 0);
- xassert(js->array_job_id == job_ptr->array_job_id);
- xassert(js->array_task_id == job_ptr->array_task_id);
-
- ats = xahash_insert_entry(array_task_cache_table,
- ARRAY_TASK_STATE_KEY_JOB_PTR(job_ptr),
- ARRAY_TASK_STATE_KEY_BYTES);
- _check_array_task_magic(ats);
- xassert(ats->array_job_id == job_ptr->array_job_id);
- xassert(ats->array_task_id == job_ptr->array_task_id);
-
- ajs = xahash_insert_entry(array_job_cache_table, &job_id,
- sizeof(job_id));
- _check_array_job_magic(ajs);
-
- if (ajs->next_job_id != job_id) {
- _log_array_job_chain(
- js, __func__,
- "skipping already linked array jobs next:JobId=%u",
- ajs->next_job_id);
- _check_array_job_magic_links(job_ptr->job_id,
- job_ptr->array_job_id, true);
- return;
- }
-
- /* Newly inserted jobs only link to themselves */
- xassert(ajs->next_job_id == job_id);
- xassert(ajs->job_id == job_id);
-
- /* need to add this job into linked list of jobs for array */
- meta = xahash_insert_entry(array_job_cache_table,
- &job_ptr->array_job_id,
- sizeof(job_ptr->array_job_id));
- _check_array_job_magic(meta);
-
- if (job_ptr->job_id == job_ptr->array_job_id) {
- /*
- * Can't link meta job to itself, so find if another job already
- * linked to meta and created a stub for the meta job.
- */
-
- if (meta->next_job_id != meta->job_id) {
- _log_array_job_chain(js, __func__,
- "skipping already linked array meta job");
- return;
- } else {
- _log_array_job_chain(js, __func__,
- "skipping linking singular array meta job");
- return;
- }
- }
-
- xassert(meta != ajs);
-
- ajs->next_job_id = meta->next_job_id;
- meta->next_job_id = job_id;
-
- _check_array_job_magic(ajs);
- _check_array_job_magic(meta);
- _log_array_job_chain(js, __func__, "linked to %pJ",
- ARRAY_JOB_STATE_MIMIC_RECORD(meta));
- _check_array_job_magic_links(job_ptr->job_id, job_ptr->array_job_id,
- true);
-}
-
-/* remove ajs from linked list of array jobs */
-static void _unlink_array_job(const job_record_t *job_ptr,
- job_state_cached_t *js,
- array_job_state_cached_t *ajs)
-{
- uint32_t job_id = js->job_id;
- uint32_t array_job_id = js->array_job_id;
- array_job_state_cached_t *next = NULL;
-
- if (ajs->next_job_id == job_id) {
- _log_array_job_chain(xahash_find_entry(cache_table, &job_id,
- sizeof(job_id)),
- __func__,
- "removing singular chain for %pJ",
- job_ptr);
-
- if (!xahash_free_entry(array_job_cache_table, &job_id,
- sizeof(job_id)))
- fatal_abort("Unable to remove %pJ after just finding it",
- ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
- return;
- } else if (js->array_job_id == js->job_id) {
- _log_array_job_chain(js, __func__,
- "skipping removal of meta which would orphan JobId=%u",
- ajs->next_job_id);
- return;
- }
-
- next = ajs;
-
- while (next->next_job_id != job_id) {
- next = xahash_find_entry(array_job_cache_table,
- &next->next_job_id,
- sizeof(next->next_job_id));
- _check_array_job_magic(next);
- }
-
- xassert(next->next_job_id == job_id);
-
- _log_array_job_chain(js, __func__, "removing from link chain");
-
- next->next_job_id = ajs->next_job_id;
- ajs->next_job_id = job_id;
-
- _log_array_job_chain(js, __func__,
- "array job chain removed for %pJ removal",
- job_ptr);
-
- if (array_job_id > 0) {
- _log_array_job_chain(xahash_find_entry(cache_table,
- &array_job_id,
- sizeof(array_job_id)),
- __func__,
- "removed %pJ from meta link chain",
- job_ptr);
- _check_array_job_magic_links(job_id, array_job_id, false);
- }
-
- _check_array_job_magic(ajs);
- _check_array_job_magic(next);
-
- if (!xahash_free_entry(array_job_cache_table, &job_id, sizeof(job_id)))
- fatal_abort("Unable to remove %pJ after just finding it",
- ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
-
- /* check for meta that only exists for this job */
- if (array_job_id && (next->job_id == next->next_job_id)) {
- job_state_cached_t *meta_js =
- xahash_find_entry(cache_table, &array_job_id,
- sizeof(array_job_id));
-
- /* should only be the meta in chain left */
- xassert(next->job_id == array_job_id);
- xassert(next->next_job_id == array_job_id);
-
- if (meta_js) {
- _log_array_job_chain(meta_js, __func__,
- "keeping meta job in chain after %pJ removal",
- job_ptr);
- } else {
- /*
- * if there is no JS for the meta job, then this chain
- * is only a placeholder that needs to be removed
- */
-
- _log_array_job_chain(js, __func__,
- "removing meta job placeholder in chain after %pJ removal",
- job_ptr);
-
- /* prune meta job that only exist to avoid orphans */
- if (!xahash_free_entry(array_job_cache_table,
- &array_job_id,
- sizeof(array_job_id)))
- fatal_abort("[JobId=%u] Unable to remove array meta job placeholder link",
- array_job_id);
- }
- }
-}
-
-static void _on_array_job_removal(const job_record_t *job_ptr,
- job_state_cached_t *js)
-{
- array_job_state_cached_t *ajs;
-
- /*
- * Need to use the cached array_task_id and not the potentially changed
- * job_ptr->array_task_id to remove the task cache
- */
-
- xassert(js);
- xassert(js->job_id == job_ptr->job_id);
-
- if ((ajs = xahash_find_entry(array_job_cache_table, &js->job_id,
- sizeof(js->job_id)))) {
- _check_array_job_magic(ajs);
- xassert(ajs->job_id == job_ptr->job_id);
-
- _check_array_job_magic_links(js->job_id, js->array_job_id,
- true);
- _unlink_array_job(job_ptr, js, ajs);
- } else {
- xassert(!js->array_job_id);
- }
-
- if (js->array_job_id > 0) {
- if (!xahash_free_entry(
- array_task_cache_table,
- ARRAY_TASK_STATE_KEY_JOB_ID(js->array_job_id,
- js->array_task_id),
- ARRAY_TASK_STATE_KEY_BYTES))
- fatal_abort("[%pJ] array task cache not found",
- JOB_STATE_MIMIC_RECORD(js));
-
- LOG("[%pJ] array task cache removed for %pJ",
- JOB_STATE_MIMIC_RECORD(js), job_ptr);
- }
-
- if (_is_debug()) {
- array_task_state_cached_t *ats;
-
- ats = xahash_find_entry(array_task_cache_table,
- ARRAY_TASK_STATE_KEY_JOB_ID(
- js->job_id, js->array_task_id),
- ARRAY_TASK_STATE_KEY_BYTES);
- ajs = xahash_find_entry(array_job_cache_table, &job_ptr->job_id,
- sizeof(job_ptr->job_id));
-
- if (ats)
- fatal_abort("found array task when there should not be one: %pJ",
- ARRAY_TASK_STATE_MIMIC_RECORD(ats));
- if (ajs && (js->job_id != js->array_job_id))
- fatal_abort("found array job link when there should not be one: %pJ",
- ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
- }
-}
-
-static void _on_array_job_change(const job_record_t *job_ptr,
- job_state_cached_t *js)
-{
- xassert(!js->het_job_id);
-
- /*
- * Array IDs can change during meta array job's life.
- * This could cause any of the existing array array_job_id/task_id ->
- * job_id or array_job chain to be incorrect and they need to be
- * rebuilt.
- */
-
- if ((js->array_task_id != job_ptr->array_task_id) ||
- (js->array_job_id != job_ptr->array_job_id)) {
- /* task id should only ever change from meta to a numeric */
- xassert(js->array_task_id == NO_VAL);
- xassert(!js->het_job_id);
- xassert(!job_ptr->het_job_id);
- xassert(job_ptr->array_job_id > 0);
- LOG("[%pJ] changed array_task_id=%u->%u array_job_id=%u->%u",
- JOB_STATE_MIMIC_RECORD(js), js->array_task_id,
- job_ptr->array_task_id, js->array_job_id,
- job_ptr->array_job_id);
-
- /* Remove task cache but leave the meta job link intact */
- if (js->array_job_id &&
- !xahash_free_entry(
- array_task_cache_table,
- ARRAY_TASK_STATE_KEY_JOB_ID(js->array_job_id,
- js->array_task_id),
- ARRAY_TASK_STATE_KEY_BYTES))
- fatal_abort("[%pJ] array task cache not found",
- JOB_STATE_MIMIC_RECORD(js));
- }
-
- xassert(js->job_id == job_ptr->job_id);
- js->array_task_id = job_ptr->array_task_id;
- js->array_job_id = job_ptr->array_job_id;
-
- _sync_job_task_id_bitmap(job_ptr, js);
- _link_array_job(job_ptr, js);
-}
-
-extern void on_job_state_change(job_record_t *job_ptr, uint32_t new_state)
-{
- DEF_DEBUG_TIMER;
- job_state_cached_t *js = NULL;
- const uint32_t job_id = job_ptr->job_id;
-
- /*
- * In order to avoid the time cost, we are not taking the cache_lock
- * to check if the cache_table pointer is !NULL as the cost of hitting
- * an invalid cached state here is very minor.
- */
- if (!cache_table)
- return;
-
- if (!job_id)
- return;
-
- START_DEBUG_TIMER;
-
- xassert(job_ptr->magic == JOB_MAGIC);
-
- slurm_rwlock_wrlock(&cache_lock);
-
- _check_all_jobs(false);
-
- if (new_state == NO_VAL) {
- js = xahash_find_entry(cache_table, &job_id, sizeof(job_id));
-
- if (js && (js->array_job_id > 0))
- _on_array_job_removal(job_ptr, js);
-
- if (xahash_free_entry(cache_table, &job_id, sizeof(job_id)))
- LOG("[%pJ] job state cache removed", job_ptr);
- else
- LOG("[%pJ] job state cache not found", job_ptr);
-
- _check_all_jobs(false);
-
- slurm_rwlock_unlock(&cache_lock);
- END_DEBUG_TIMER;
- return;
- }
-
- js = xahash_insert_entry(cache_table, &job_id, sizeof(job_id));
- xassert(js->magic == MAGIC_JOB_STATE_CACHED);
-
- if (_is_debug() && (js->job_state != new_state)) {
- char *before = job_state_string_complete(js->job_state);
- char *after = job_state_string_complete(job_ptr->job_state);
-
- LOG("[%pJ] changed state: %s -> %s",
- JOB_STATE_MIMIC_RECORD(js), before, after);
-
- xfree(before);
- xfree(after);
- }
-
- js->job_state = new_state;
-
- if (job_ptr->array_job_id || js->array_job_id)
- _on_array_job_change(job_ptr, js);
-
- /*
- * A het job is added to the job state cache after each component is
- * created. Because this cache exists outside of the job read/write
- * locks, that means a job state cache query can happen while a het
- * job is being created, and het job id may not be set for each
- * component yet. In that case, check that the het job state cache has
- * not been initialized yet.
- */
-
- if (_is_debug() && (js->het_job_id != job_ptr->het_job_id)) {
- xassert(!js->het_job_id);
- xassert(js->array_task_id == NO_VAL);
- xassert(!js->array_job_id);
- xassert(job_ptr->array_task_id == NO_VAL);
- xassert(!job_ptr->array_job_id);
- LOG("[%pJ] changed het_job_id=%u->%u",
- JOB_STATE_MIMIC_RECORD(js), js->het_job_id,
- job_ptr->het_job_id);
- }
-
- js->het_job_id = job_ptr->het_job_id;
-
- _check_all_jobs(false);
-
- slurm_rwlock_unlock(&cache_lock);
- END_DEBUG_TIMER;
-}
-
-static xahash_hash_t _hash(const void *key, const size_t key_bytes,
- void *state_ptr)
-{
- cache_table_state_t *state = state_ptr;
- const uint32_t *job_id_ptr = key;
-
- _check_state_magic(state);
- _check_job_id(job_id_ptr);
- xassert(sizeof(*job_id_ptr) == key_bytes);
-
- return *job_id_ptr % state->table_size;
-}
-
-static bool _match(void *entry, const void *key, const size_t key_bytes,
- void *state_ptr)
-{
- job_state_cached_t *js = entry;
- cache_table_state_t *state = state_ptr;
- const uint32_t *job_id_ptr = key;
-
- _check_job_magic(js);
- _check_state_magic(state);
- _check_job_id(job_id_ptr);
- xassert(sizeof(*job_id_ptr) == key_bytes);
-
- return (js->job_id == *job_id_ptr);
-}
-
-static void _on_insert(void *entry, const void *key, const size_t key_bytes,
- void *state_ptr)
-{
- job_state_cached_t *js = entry;
- const uint32_t *job_id_ptr = key;
- cache_table_state_t *state = state_ptr;
-
- _check_state_magic(state);
- _check_job_id(job_id_ptr);
- xassert(sizeof(*job_id_ptr) == key_bytes);
-
- *js = (job_state_cached_t) {
- ONLY_DEBUG(.magic = MAGIC_JOB_STATE_CACHED,)
- .job_id = *job_id_ptr,
- .job_state = NO_VAL,
- .array_task_id = NO_VAL,
- };
-
- LOG("%pJ inserted", JOB_STATE_MIMIC_RECORD(js));
- _check_job_magic(js);
-}
-
-static void _on_free(void *ptr, void *state_ptr)
-{
- job_state_cached_t *js = ptr;
- cache_table_state_t *state = state_ptr;
-
- _check_job_magic(js);
- _check_state_magic(state);
-
- LOG("%pJ releasing", JOB_STATE_MIMIC_RECORD(js));
-
- FREE_NULL_BITMAP(js->task_id_bitmap);
- ONLY_DEBUG(memset(js, 0, sizeof(*js)));
- ONLY_DEBUG(js->magic = ~MAGIC_JOB_STATE_CACHED);
-}
-
-static bool _array_job_match(void *entry, const void *key,
- const size_t key_bytes, void *state_ptr)
-{
- array_job_state_cached_t *ajs = entry;
- cache_table_state_t *state = state_ptr;
- const uint32_t *job_id_ptr = key;
-
- _check_state_magic(state);
- _check_array_job_magic(ajs);
- _check_job_id(job_id_ptr);
- xassert(sizeof(*job_id_ptr) == key_bytes);
-
- return (ajs->job_id == *job_id_ptr);
-}
-
-static void _array_job_on_insert(void *entry, const void *key,
- const size_t key_bytes, void *state_ptr)
-{
- array_job_state_cached_t *ajs = entry;
- cache_table_state_t *state = state_ptr;
- const uint32_t *job_id_ptr = key;
-
- _check_state_magic(state);
- _check_job_id(job_id_ptr);
- xassert(sizeof(*job_id_ptr) == key_bytes);
-
- *ajs = (array_job_state_cached_t) {
- ONLY_DEBUG(.magic = MAGIC_ARRAY_JOB_STATE_CACHED,)
- .job_id = *job_id_ptr,
- .next_job_id = *job_id_ptr,
- };
-
- LOG("%pJ inserted", ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
- _check_array_job_magic(ajs);
-}
-
-static void _array_job_on_free(void *ptr, void *state_ptr)
-{
- array_job_state_cached_t *ajs = ptr;
- cache_table_state_t *state = state_ptr;
-
- _check_state_magic(state);
- _check_array_job_magic(ajs);
-
- LOG("%pJ released", ARRAY_JOB_STATE_MIMIC_RECORD(ajs));
- ONLY_DEBUG(memset(ajs, 0, sizeof(*ajs)));
- ONLY_DEBUG(ajs->magic = ~MAGIC_ARRAY_JOB_STATE_CACHED);
-}
-
-static xahash_hash_t _array_task_hash(const void *key, const size_t key_bytes,
- void *state_ptr)
-{
- uint64_t seed;
- cache_table_state_t *state = state_ptr;
- const array_task_state_cached_t *ats_key = key;
-
- _check_state_magic(state);
- _check_array_task_magic(ats_key);
-
- seed = ((uint64_t) ats_key->array_job_id) << 32;
- seed |= ats_key->array_task_id;
-
- return seed % state->table_size;
-}
-
-static bool _array_task_match(void *entry, const void *key,
- const size_t key_bytes, void *state_ptr)
-{
- array_task_state_cached_t *ats = entry;
- cache_table_state_t *state = state_ptr;
- const array_task_state_cached_t *ats_key = key;
-
- _check_state_magic(state);
- _check_array_task_magic(ats);
- _check_array_task_magic(ats_key);
- xassert(sizeof(*ats_key) == key_bytes);
-
- /* treat NO_VAL and INFINITE as * for arrays */
- if ((ats_key->array_task_id < NO_VAL) &&
- (ats->array_task_id != ats_key->array_task_id))
- return false;
-
- return (ats->array_job_id == ats_key->array_job_id);
-}
-
-static void _array_task_on_insert(void *entry, const void *key,
- const size_t key_bytes, void *state_ptr)
-{
- array_task_state_cached_t *ats = entry;
- const array_task_state_cached_t *ats_key = key;
- cache_table_state_t *state = state_ptr;
-
- _check_state_magic(state);
- _check_array_task_magic(ats_key);
- xassert(sizeof(*ats_key) == key_bytes);
-
- *ats = *ats_key;
-
- LOG("%pJ inserted", ARRAY_TASK_STATE_MIMIC_RECORD(ats));
- _check_array_task_magic(ats);
-}
-
-static void _array_task_on_free(void *ptr, void *state_ptr)
-{
- array_task_state_cached_t *ats = ptr;
- cache_table_state_t *state = state_ptr;
-
- _check_state_magic(state);
- _check_array_task_magic(ats);
-
- LOG("%pJ released", ARRAY_TASK_STATE_MIMIC_RECORD(ats));
- ONLY_DEBUG(memset(ats, 0, sizeof(*ats)));
- ONLY_DEBUG(ats->magic = ~MAGIC_ARRAY_TASK_STATE_CACHED);
-}
-
-extern void setup_job_state_hash(int new_hash_table_size)
-{
- const cache_table_state_t nstate = (cache_table_state_t) {
- ONLY_DEBUG(.magic = MAGIC_CACHE_TABLE_STATE,)
- .table_size = new_hash_table_size,
- };
-
- LOG("Job state cache active with %d jobs in hash tables",
- new_hash_table_size);
-
- slurm_rwlock_wrlock(&cache_lock);
-
- xassert(!cache_table);
- cache_table = xahash_new_table(_hash, _match, _on_insert, _on_free,
- sizeof(cache_table_state_t),
- sizeof(job_state_cached_t),
- new_hash_table_size);
- *((cache_table_state_t *) xahash_get_state_ptr(cache_table)) = nstate;
-
- xassert(!array_job_cache_table);
- array_job_cache_table =
- xahash_new_table(_hash, _array_job_match,
- _array_job_on_insert, _array_job_on_free,
- sizeof(cache_table_state_t),
- sizeof(array_job_state_cached_t),
- new_hash_table_size);
- *((cache_table_state_t *) xahash_get_state_ptr(array_job_cache_table)) =
- nstate;
-
- xassert(!array_task_cache_table);
- array_task_cache_table =
- xahash_new_table(_array_task_hash, _array_task_match,
- _array_task_on_insert, _array_task_on_free,
- sizeof(cache_table_state_t),
- sizeof(array_task_state_cached_t),
- new_hash_table_size);
- *((cache_table_state_t *)
- xahash_get_state_ptr(array_task_cache_table)) = nstate;
-
- slurm_rwlock_unlock(&cache_lock);
+ _log_job_state_change(job_ptr, new_state, caller);
}
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index b36586b..c0ba8ed 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -552,38 +552,58 @@
*/
extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid );
+/* Call job_state_set() instead */
+extern void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+ const char *caller);
+
/*
* Set job state
* IN job_ptr - Job to update
* IN state - state from enum job_states
*/
-extern void job_state_set(job_record_t *job_ptr, uint32_t state);
+#define job_state_set(job_ptr, state) \
+ slurm_job_state_set((job_ptr), (state), __func__)
+
+/* Call job_state_set_flag() instead */
+extern void slurm_job_state_set_flag(job_record_t *job_ptr, uint32_t flag,
+ const char *caller);
/*
* Set job state flag
* IN job_ptr - Job to update
* IN flag - flag to set (from JOB_* macro)
*/
-extern void job_state_set_flag(job_record_t *job_ptr, uint32_t flag);
+#define job_state_set_flag(job_ptr, flag) \
+ slurm_job_state_set_flag((job_ptr), (flag), __func__)
+
+/* Call job_state_set_flag() instead */
+extern void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+ const char *caller);
/*
* Unset job state flag
* IN job_ptr - Job to update
* IN flag - flag to unset (from JOB_* macro)
*/
-extern void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag);
+#define job_state_unset_flag(job_ptr, flag) \
+ slurm_job_state_unset_flag((job_ptr), (flag), __func__)
/* dump_all_job_state - save the state of all jobs to file
* RET 0 or error code */
extern int dump_all_job_state ( void );
+/* Call on_job_state_change() instead */
+extern void slurm_on_job_state_change(job_record_t *job_ptr, uint32_t new_state,
+ const char *caller);
+
/*
* Notify/update job state hash table that job state has changed
* IN job_ptr - Job about to be updated
* IN new_state - New value that will be assigned to job_ptr->job_state.
* If NO_VAL, then delete the cache entry.
*/
-extern void on_job_state_change(job_record_t *job_ptr, uint32_t new_state);
+#define on_job_state_change(job_ptr, new_state) \
+ slurm_on_job_state_change((job_ptr), (new_state), __func__)
/* dump_all_node_state - save the state of all nodes to file */
extern int dump_all_node_state ( void );
@@ -1581,12 +1601,6 @@
*/
extern void rehash_jobs(void);
-/*
- * Setup and prepare job state cache (if configured)
- * IN new_hash_table_size - number of entries in hash table
- */
-extern void setup_job_state_hash(int new_hash_table_size);
-
/* update first assigned job id as needed on reconfigure */
extern void reset_first_job_id(void);
diff --git a/src/slurmctld/state_save.c b/src/slurmctld/state_save.c
index d4ae016..eb73c78 100644
--- a/src/slurmctld/state_save.c
+++ b/src/slurmctld/state_save.c
@@ -45,7 +45,13 @@
#include <pthread.h>
+#include "src/common/log.h"
#include "src/common/macros.h"
+#include "src/common/probes.h"
+#include "src/common/slurm_protocol_defs.h"
+#include "src/common/slurm_time.h"
+#include "src/common/timers.h"
+
#include "src/slurmctld/reservation.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/trigger_mgr.h"
@@ -55,11 +61,24 @@
#define SAVE_MAX_WAIT 5
#endif
+#define SAVE_COUNT_DELAY \
+ ((timespec_t) { \
+ .tv_sec = 1, \
+ })
+#define STATESAVE_WARN_TS \
+ ((timespec_t) { \
+ .tv_nsec = (NSEC_IN_SEC / 2), \
+ })
+#define CTIME_STR_LEN 72
+
static pthread_mutex_t state_save_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t state_save_cond = PTHREAD_COND_INITIALIZER;
static int save_jobs = 0, save_nodes = 0, save_parts = 0;
static int save_triggers = 0, save_resv = 0;
static bool run_save_thread = true;
+static latency_histogram_t save_histogram = LATENCY_HISTOGRAM_INITIALIZER;
+static timespec_t last_save = { 0, 0 };
+static timespec_t save_start = { 0, 0 };
/* Queue saving of job state information */
extern void schedule_job_save(void)
@@ -115,6 +134,71 @@
slurm_mutex_unlock(&state_save_lock);
}
+static void _check_slow_save(void)
+{
+ timespec_diff_ns_t tdiff = { { 0 } };
+ bool warn = false;
+ char delay_str[CTIME_STR_LEN] = { 0 };
+
+ tdiff = timespec_diff_ns(last_save, save_start);
+ xassert(tdiff.after);
+
+ latency_metric_add_histogram_value(&save_histogram, tdiff.diff);
+
+ if (!(warn = timespec_is_after(tdiff.diff, STATESAVE_WARN_TS)))
+ return;
+
+ (void) timespec_ctime(tdiff.diff, false, delay_str, sizeof(delay_str));
+ warning("Saving to StateSaveLocation took %s. Please check backing filesystem as all of Slurm operations are delayed due to slow StateSaveLocation writes.",
+ delay_str);
+}
+
+static void _probe_verbose(probe_log_t *log)
+{
+ char histogram[LATENCY_METRIC_HISTOGRAM_STR_LEN] = { 0 };
+ char ts_str[CTIME_STR_LEN] = { 0 };
+
+ if (timespec_is_after(save_start, last_save)) {
+ (void) timespec_ctime(save_start, true, ts_str, sizeof(ts_str));
+ probe_log(log, "StateSave Status: SAVING");
+ probe_log(log, "StateSave Started: %s", ts_str);
+ } else {
+ (void) timespec_ctime(timespec_rem(last_save, save_start),
+ false, ts_str, sizeof(ts_str));
+ probe_log(log, "StateSave Status: SLEEPING");
+ probe_log(log, "StateSave Last Duration: %s", ts_str);
+ }
+
+ (void) timespec_ctime(last_save, true, ts_str, sizeof(ts_str));
+ probe_log(log, "StateSave Last Save: %s", ts_str);
+
+ (void) latency_histogram_print_labels(histogram, sizeof(histogram));
+ probe_log(log, "StateSave Histogram: %s", histogram);
+
+ (void) latency_histogram_print(&save_histogram, histogram,
+ sizeof(histogram));
+ probe_log(log, "StateSave Histogram: %s", histogram);
+}
+
+static probe_status_t _probe(probe_log_t *log)
+{
+ probe_status_t status = PROBE_RC_UNKNOWN;
+
+ slurm_mutex_lock(&state_save_lock);
+
+ if (log)
+ _probe_verbose(log);
+
+ if (!last_save.tv_sec)
+ status = PROBE_RC_ONLINE;
+ else
+ status = PROBE_RC_READY;
+
+ slurm_mutex_unlock(&state_save_lock);
+
+ return status;
+}
+
/*
* Run as pthread to keep saving slurmctld state information as needed,
* Use schedule_job_save(), schedule_node_save(), and schedule_part_save()
@@ -124,8 +208,6 @@
*/
extern void *slurmctld_state_save(void *no_data)
{
- time_t last_save = 0, now;
- double save_delay;
bool run_save;
int save_count;
@@ -135,34 +217,47 @@
}
#endif
+ probe_register(__func__, _probe);
+
while (1) {
/* wait for work to perform */
slurm_mutex_lock(&state_save_lock);
while (1) {
+ timespec_diff_ns_t save_delay = { { 0 } };
+
+ if (last_save.tv_sec) {
+ save_delay = timespec_diff_ns(timespec_now(),
+ save_start);
+ xassert(save_delay.after);
+ }
+
save_count = save_jobs + save_nodes + save_parts +
save_resv + save_triggers;
- now = time(NULL);
- save_delay = difftime(now, last_save);
+
if (save_count &&
- (!run_save_thread ||
- (save_delay >= SAVE_MAX_WAIT))) {
- last_save = now;
+ (!run_save_thread || !last_save.tv_sec ||
+ (timespec_to_secs(save_delay.diff) >=
+ SAVE_MAX_WAIT))) {
break; /* do the work */
} else if (!run_save_thread) {
run_save_thread = true;
slurm_mutex_unlock(&state_save_lock);
return NULL; /* shutdown */
} else if (save_count) { /* wait for a timeout */
- struct timespec ts = {0, 0};
- ts.tv_sec = now + 1;
+ timespec_t delay =
+ timespec_add(timespec_now(),
+ SAVE_COUNT_DELAY);
+
slurm_cond_timedwait(&state_save_cond,
- &state_save_lock, &ts);
- } else { /* wait for more work */
+ &state_save_lock, &delay);
+ } else { /* wait for more work */
slurm_cond_wait(&state_save_cond,
&state_save_lock);
}
}
+ save_start = timespec_now();
+
/* save job info if necessary */
run_save = false;
/* slurm_mutex_lock(&state_save_lock); done above */
@@ -217,5 +312,10 @@
slurm_mutex_unlock(&state_save_lock);
if (run_save)
(void)trigger_state_save();
+
+ slurm_mutex_lock(&state_save_lock);
+ last_save = timespec_now();
+ _check_slow_save();
+ slurm_mutex_unlock(&state_save_lock);
}
}
diff --git a/src/slurmd/slurmd/job_mem_limit.c b/src/slurmd/slurmd/job_mem_limit.c
index 0d13fd0..55dee9f 100644
--- a/src/slurmd/slurmd/job_mem_limit.c
+++ b/src/slurmd/slurmd/job_mem_limit.c
@@ -129,34 +129,34 @@
{
step_loc_t *stepd = x;
int fd;
- slurmstepd_mem_info_t stepd_mem_info;
+ uint64_t job_mem_limit = 0;
fd = stepd_connect(stepd->directory, stepd->nodename, &stepd->step_id,
&stepd->protocol_version);
if (fd == -1)
return 1; /* step completed */
- if (stepd_get_mem_limits(fd, stepd->protocol_version,
- &stepd_mem_info) != SLURM_SUCCESS) {
+ if (stepd_get_mem_limit(fd, stepd->protocol_version, &job_mem_limit) !=
+ SLURM_SUCCESS) {
error("Error reading %ps memory limits from slurmstepd",
&stepd->step_id);
close(fd);
return 1;
}
- if (stepd_mem_info.job_mem_limit) {
+ if (job_mem_limit) {
job_mem_limits_t *limits =
list_find_first(job_limits_list, _match_job,
&stepd->step_id.job_id);
if (limits) {
- if (stepd_mem_info.job_mem_limit > limits->job_mem)
- limits->job_mem = stepd_mem_info.job_mem_limit;
+ if (job_mem_limit > limits->job_mem)
+ limits->job_mem = job_mem_limit;
} else {
/* create entry for this step */
limits = xmalloc(sizeof(*limits));
limits->job_id = stepd->step_id.job_id;
- limits->job_mem = stepd_mem_info.job_mem_limit;
+ limits->job_mem = job_mem_limit;
debug2("%s: RecLim JobId=%u job_mem:%"PRIu64,
__func__, stepd->step_id.job_id, limits->job_mem);
list_append(job_limits_list, limits);
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index b847bd5..e604772 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -822,7 +822,6 @@
static int _handle_mem_limits(int fd, uid_t uid, pid_t remote_pid)
{
safe_write(fd, &step->job_mem, sizeof(uint64_t));
- safe_write(fd, &step->step_mem, sizeof(uint64_t));
return SLURM_SUCCESS;
rwfail:
@@ -2357,13 +2356,20 @@
slurm_mutex_unlock(&extern_thread_lock);
for (int i = 0; i < thread_cnt; i++) {
+ pthread_t thread = 0;
debug2("Joining extern pid thread %d", i);
- slurm_thread_join(extern_threads[i]);
+
+ slurm_mutex_lock(&extern_thread_lock);
+ SWAP(thread, extern_threads[i]);
+ extern_thread_cnt--;
+ slurm_mutex_unlock(&extern_thread_lock);
+
+ slurm_thread_join(thread);
}
slurm_mutex_lock(&extern_thread_lock);
xfree(extern_threads);
- extern_thread_cnt = 0;
+ xassert(!extern_thread_cnt);
slurm_mutex_unlock(&extern_thread_lock);
debug2("Done joining extern pid threads");
diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c
index f478023..1b75f5d 100644
--- a/src/slurmd/slurmstepd/slurmstepd.c
+++ b/src/slurmd/slurmstepd/slurmstepd.c
@@ -847,6 +847,45 @@
return 0;
}
+/* do nothing */
+static void _ns_on_sigchld(int signo) {}
+
+/*
+ * Reap all adopted processes forever
+ *
+ * If creating new PID namespaces with namespaces/linux, processes that
+ * terminate inside the new PID namespace may become children of this
+ * slurmstepd. Such processes that would normally be reaped by the original init
+ * process now need to be reaped by this slurmstepd.
+ */
+static void _reap_adopted_processes(void)
+{
+ struct sigaction sa = {
+ .sa_handler = _ns_on_sigchld,
+ };
+ sigset_t mask;
+
+ /* Unblock SIGCHLD if sigmask was inherited with it blocked */
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
+
+ /*
+ * Override default ignore behavior for SIGCHLD by adding empty handler
+ * function.
+ */
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGCHLD, &sa, NULL);
+
+ while (true) {
+ /* Block until SIGCHLD (or any signal) is received */
+ pause();
+ /* Cleanup any terminated processes (if any) */
+ while (waitpid(-1, NULL, WNOHANG) > 0)
+ ;
+ }
+}
+
/*
* Process special "modes" of slurmstepd passed as cmdline arguments.
*/
@@ -870,7 +909,9 @@
setproctitle("%s", buf);
xfree(buf);
set_oom_adj(STEPD_OOM_ADJ);
- (void) poll(NULL, 0, -1);
+
+ _reap_adopted_processes();
+
fini_setproctitle();
exit(0);
}
@@ -927,7 +968,7 @@
safe_read(sock, &ok, sizeof(int));
return;
rwfail:
- error("Unable to receive \"ok ack\" to slurmd");
+ error("Unable to receive \"ok ack\" from slurmd");
#endif
}
diff --git a/testsuite/README b/testsuite/README
index c6ff847..dfbc7df 100644
--- a/testsuite/README
+++ b/testsuite/README
@@ -1088,6 +1088,10 @@
test_152_1 Test IMEX channel allocation/deallocation
test_152_2 Test --network=unique-channel-per-segment
+test_153_# Testing Metrics/Type.
+========================================
+test_153_1 Test basic metrics/openmetrics
+
test_154_# Testing HRes.
========================================
test_154_1 Test HRes MODE_3
diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91
index f78d94b..baa28b4 100755
--- a/testsuite/expect/test1.91
+++ b/testsuite/expect/test1.91
@@ -217,14 +217,16 @@
# Run a job step to get allocated processor count
#
set task_cnt 0
-send "$srun -c1 ./$file_prog\r"
+send "$srun -c1 ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_cnt
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -241,14 +243,16 @@
#
set expected_mask [ expr ((1 << $task_cnt) - 1) ]
set task_mask 0
-send "$srun -c1 -n $task_cnt --cpu-bind=threads ./$file_prog\r"
+send "$srun -c1 -n $task_cnt --cpu-bind=threads ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -276,14 +280,16 @@
log_debug "core_mask: $expected_mask"
set task_mask 0
-send "$srun -c1 -n $core_task_cnt -B 1:1:1 ./$file_prog\r"
+send "$srun -c1 -n $core_task_cnt -B 1:1:1 ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -306,7 +312,7 @@
set num_tasks 0
set num_bits 0
set task_mask 0
- send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads ./$file_prog\r"
+ send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
@@ -321,8 +327,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun (from --allocate) not responding or failure to recognize prompt"
@@ -350,7 +358,7 @@
set num_tasks 0
set num_bits 0
set task_mask 0
- send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads ./$file_prog\r"
+ send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
@@ -365,8 +373,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -394,7 +404,7 @@
set num_tasks 0
set num_bits 0
set task_mask 0
- send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt ./$file_prog\r"
+ send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
@@ -409,8 +419,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -437,7 +449,7 @@
set num_tasks 0
set num_bits 0
set task_mask 0
- send "$srun -c$this_cnt -B 1:1:1 ./$file_prog\r"
+ send "$srun -c$this_cnt -B 1:1:1 ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
@@ -452,8 +464,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -477,14 +491,16 @@
#
set expected_mask [ expr ((1 << $task_cnt) - 1) ]
set task_mask 0
-send "$srun -n $task_cnt -m plane=4 ./$file_prog\r"
+send "$srun -n $task_cnt -m plane=4 ./$file_prog; echo EXIT_CODE:$?\r"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -504,9 +520,6 @@
#
send "exit\r"
expect {
- -re "error" {
- fail "Some error occurred"
- }
timeout {
fail "salloc not responding or failure to recognize prompt"
}
diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92
index adfbfa6..c710698 100755
--- a/testsuite/expect/test1.92
+++ b/testsuite/expect/test1.92
@@ -61,7 +61,7 @@
set task_cnt 0
set prev_node -1
set node_cnt 0
-spawn $bin_bash -c "$srun $srun_args -l -c1 $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -c1 $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
expect {
-re "nodeid:($number) taskid:($number)" {
set this_node $expect_out(1,string)
@@ -73,8 +73,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "salloc not responding or failure to recognize prompt"
@@ -96,7 +98,7 @@
set this_cnt 0
set prev_node -1
set this_node -1
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m block $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m block $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
expect {
-re "nodeid:($number) taskid:($number) localid:($number)" {
set this_node $expect_out(1,string)
@@ -112,8 +114,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun not responding or failure to recognize prompt"
@@ -131,7 +135,7 @@
set prev_node -1
set this_node -1
set prev_cnt $block_size
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m cyclic $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m cyclic $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
expect {
-re "nodeid:($number) taskid:($number) localid:($number)" {
set this_node $expect_out(1,string)
@@ -147,8 +151,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun not responding or failure to recognize prompt"
@@ -166,7 +172,7 @@
set prev_node -1
set this_node -1
set prev_cnt $block_size
-spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m plane=$block_size $file_bash | $bin_sort -V"
+spawn $bin_bash -c "$srun $srun_args -l -n $task_cnt -m plane=$block_size $file_bash | $bin_sort -V; echo EXIT_CODE:$?"
expect {
-re "nodeid:($number) taskid:($number) localid:($number)" {
set this_node $expect_out(1,string)
@@ -182,8 +188,10 @@
}
exp_continue
}
- -re "error" {
- fail "Some error occurred"
+ -re "EXIT_CODE:(\d+)" {
+ if {[$expect_out(1,string)] != 0} {
+ fail "Some error occurred"
+ }
}
timeout {
fail "srun not responding or failure to recognize prompt"
diff --git a/testsuite/expect/test15.27 b/testsuite/expect/test15.27
index cbdf2ec..57de92c 100755
--- a/testsuite/expect/test15.27
+++ b/testsuite/expect/test15.27
@@ -72,6 +72,11 @@
skip "Test invalid without slurmdbd"
}
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "In 25.11 we fixed some issues with stepmgr that were leading to random fails in this test."
+}
+
set node_name [get_nodes_by_request "--gres=gpu:2 -n1 -t1"]
if { [llength $node_name] != 1 } {
skip "This test need to be able to submit jobs with at least --gres=gpu:2"
diff --git a/testsuite/expect/test15.37 b/testsuite/expect/test15.37
index 55b7935..2a63f01 100755
--- a/testsuite/expect/test15.37
+++ b/testsuite/expect/test15.37
@@ -30,6 +30,11 @@
set job_list [list]
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "In 25.11 we fixed some issues with stepmgr that were leading to random fails in this test."
+}
+
#
# Get the node to use
#
diff --git a/testsuite/expect/test17.12 b/testsuite/expect/test17.12
index 4839db3..d0c84f5 100755
--- a/testsuite/expect/test17.12
+++ b/testsuite/expect/test17.12
@@ -51,6 +51,11 @@
#
# Prerequisites
#
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "Ticket 23611: In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
if {![is_super_user] || [get_config_param "NodeFeaturesPlugins"] ne "(null)"} {
skip "Test needs super user and not NodeFeaturesPlugins"
}
diff --git a/testsuite/expect/test17.17 b/testsuite/expect/test17.17
index 08784a5..0b24464 100755
--- a/testsuite/expect/test17.17
+++ b/testsuite/expect/test17.17
@@ -44,6 +44,11 @@
cancel_job [list $job_id1 $job_id2]
}
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
set node_name [get_nodes_by_request "--gres=gpu:2 -n1 -t1"]
if { [llength $node_name] != 1 } {
skip "This test need to be able to submit jobs with at least --gres=gpu:2"
diff --git a/testsuite/expect/test17.60 b/testsuite/expect/test17.60
index 1448c36..d2a4f9b 100755
--- a/testsuite/expect/test17.60
+++ b/testsuite/expect/test17.60
@@ -30,6 +30,11 @@
set job_list [list]
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
#
# Get the node to use
#
diff --git a/testsuite/expect/test39.5 b/testsuite/expect/test39.5
index 3da06ef..d83c2c3 100755
--- a/testsuite/expect/test39.5
+++ b/testsuite/expect/test39.5
@@ -81,6 +81,11 @@
skip "This test is only compatible with select/cons_tres"
}
+if {[param_contains [get_config_param "SlurmctldParameters"] "enable_stepmgr"] && \
+ [package vcompare [get_version] "25.11.0"] < 0 } {
+ skip "In 25.11 we fixed some issues with stepmgr that were leading to random failures in this test."
+}
+
set nb_nodes [get_partition_param [default_partition] "TotalNodes"]
if {$nb_nodes > 1} {
set nb_nodes 2
diff --git a/testsuite/expect/test9.2 b/testsuite/expect/test9.2
index b0bfcad..ae6d1fc 100755
--- a/testsuite/expect/test9.2
+++ b/testsuite/expect/test9.2
@@ -33,55 +33,28 @@
set file_out "$test_dir/output"
set job_name $test_name
-
-set cycle_count [get_cycle_count]
-set task_cnt $max_stress_tasks
-set node_cnt 1-4
+set cycle_count 2
+set task_cnt [get_total_cpus]
+set node_cnt [llength [get_nodes_by_state]]
set other_opts "-O"
-# Execute an srun job to print hostname to output_file with task_cnt tasks per node,
-# wait for completion
-# Returns 0 on successful completion, returns 1 otherwise
-proc run_hostname_job { output_file } {
- global bin_printenv bin_rm job_name number srun node_cnt other_opts
- global task_cnt timeout
- exec $bin_rm -f $output_file
-
- spawn $srun --job-name=$job_name --input=/dev/null --output=$output_file --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_printenv SLURMD_NODENAME
- expect {
- -re "Unable to contact" {
- fail "Slurm appears to be down"
- }
- timeout {
- fail "srun not responding"
- }
- eof {
- wait
- }
- }
-
- wait_for_file -fail $output_file
-}
+log_info "Using $task_cnt tasks"
#
# Run cycle_count jobs to run "hostname" and check output file size
#
-set success_cnt 0
-set timeout $max_job_delay
for {set inx 0} {$inx < $cycle_count} {incr inx} {
- run_hostname_job $file_out
- set stdout_lines [get_line_cnt $file_out]
- if {$stdout_lines != $task_cnt} {
- exec $bin_sleep 1
+ # Remove files from previous cycle
+ exec $bin_rm -f $file_out
+
+ # Submit a job that copies -i to -o
+ run_command -fail "$srun --job-name=$job_name --input=/dev/null --output=$file_out --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_printenv SLURMD_NODENAME"
+
+ # Test output file size
+ set stdout_lines 0
+ wait_for_file $file_out
+ wait_for {$stdout_lines == $task_cnt} {
set stdout_lines [get_line_cnt $file_out]
}
- if {$stdout_lines != $task_cnt} {
- if {$stdout_lines == 0} {
- fail "stdout is empty. Is current working directory writable from compute nodes?"
- } else {
- fail "stdout is incomplete"
- }
- } else {
- incr success_cnt
- }
+ subtest {$stdout_lines == $task_cnt} "Output file should have one line per task" "$stdout_lines != $task_cnt"
}
diff --git a/testsuite/python/check/common/test_timespec_t.c b/testsuite/python/check/common/test_timespec_t.c
index 6a76c77..dd17ae4 100644
--- a/testsuite/python/check/common/test_timespec_t.c
+++ b/testsuite/python/check/common/test_timespec_t.c
@@ -67,16 +67,16 @@
ck_assert(y.tv_nsec == 5);
ck_assert(y.tv_sec == 5);
- x = (timespec_t) { 10, (10 * NSEC_IN_SEC) };
- y = (timespec_t) { 5, (5 * NSEC_IN_SEC) };
+ x = (timespec_t) { 10, ((2 * NSEC_IN_SEC) + 150) };
+ y = (timespec_t) { 5, ((1 * NSEC_IN_SEC) + 88) };
x = timespec_normalize(x);
- ck_assert(x.tv_nsec == 20);
- ck_assert(x.tv_sec == 0);
+ ck_assert(x.tv_sec == 12);
+ ck_assert(x.tv_nsec == 150);
y = timespec_normalize(y);
- ck_assert(y.tv_nsec == 10);
- ck_assert(y.tv_sec == 0);
+ ck_assert(y.tv_sec == 6);
+ ck_assert(y.tv_nsec == 88);
}
END_TEST
@@ -115,6 +115,10 @@
START_TEST(test_rem)
{
+#if SLURM_VERSION_NUMBER < SLURM_VERSION_NUM(26, 5, 0)
+ printf("XFAIL: Issue #50096: timespec_t - Math operations fail cross negatives\n");
+#endif
+
timespec_t x = { 10, 4 };
timespec_t y = { 5, 2 };
timespec_t t1 = { 0 }, t2 = { 0 }, t3 = { 0 };
@@ -123,18 +127,13 @@
ck_assert(t1.tv_sec == 5);
ck_assert(t1.tv_nsec == 2);
- /* Negative math is rejected currently */
t2 = timespec_rem(y, x);
- //ck_assert(t2.tv_sec == -5);
- //ck_assert(t2.tv_nsec == -2);
- ck_assert(t2.tv_sec == 0);
- ck_assert(t2.tv_nsec == 0);
+ ck_assert(t2.tv_sec == -5);
+ ck_assert(t2.tv_nsec == -2);
- t3 = timespec_rem(t1, t2);
- //ck_assert(t3.tv_sec == 0);
- //ck_assert(t3.tv_nsec == 0);
- ck_assert(t3.tv_sec == 5);
- ck_assert(t3.tv_nsec == 2);
+ t3 = timespec_add(t1, t2);
+ ck_assert(t3.tv_sec == 0);
+ ck_assert(t3.tv_nsec == 0);
}
END_TEST
diff --git a/testsuite/python/conftest.py b/testsuite/python/conftest.py
index 9746d61..8d4aaf7 100644
--- a/testsuite/python/conftest.py
+++ b/testsuite/python/conftest.py
@@ -234,6 +234,34 @@
quiet=True,
)
+ # Backup current SysConfigDir
+ if os.path.exists(atf.properties["slurm-config-dir"]):
+ if os.path.exists(atf.properties["slurm-config-dir"] + name):
+ logging.warning(
+ f"Backup for SysConfigDir already exists ({atf.properties['slurm-config-dir']+name}). Removing it."
+ )
+ atf.run_command(
+ f"rm -rf {atf.properties['slurm-config-dir']+name}",
+ user="root",
+ quiet=True,
+ )
+ atf.run_command(
+ f"rsync -a --delete {atf.properties['slurm-config-dir']}/ {atf.properties['slurm-config-dir']+name}/",
+ user="root",
+ quiet=True,
+ )
+ # Setup a fresh SysConfDir based on slurm-config-orig-dir
+ if os.path.exists(atf.properties["slurm-config-orig-dir"]):
+ atf.run_command(
+ f"sudo rsync -a --delete {atf.properties['slurm-config-orig-dir']}/ {atf.properties['slurm-config-dir']}/",
+ quiet=True,
+ fatal=True,
+ )
+ else:
+ logging.warning(
+ f"Base SysConfDir ({atf.properties['slurm-config-orig-dir']}) doesn't exists. Using current SysConfDir as the Base one."
+ )
+
# Create the required node directories for node0
node_name = "node0"
spool_dir = atf.properties["slurm-spool-dir"].replace("%n", node_name)
@@ -281,6 +309,23 @@
quiet=True,
)
+ # Restore SysConfigDir
+ atf.run_command(
+ f"rm -rf {atf.properties['slurm-config-dir']}", user="root", quiet=True
+ )
+ if os.path.exists(
+ atf.properties["slurm-config-dir"] + atf.properties["test_name"]
+ ):
+ atf.run_command(
+ f"mv {atf.properties['slurm-config-dir']+atf.properties['test_name']} {atf.properties['slurm-config-dir']}",
+ user="root",
+ quiet=True,
+ )
+ else:
+ logging.warning(
+ "SysConfDir backup doesn't exists, but it should be created in the module_setup fixture."
+ )
+
# Remove Nodes directories:
if "nodes" not in atf.properties:
atf.properties["nodes"] = ["node0"]
@@ -317,10 +362,6 @@
fatal=True,
)
- # Restore any backed up configuration files
- for config in set(atf.properties["configurations-modified"]):
- atf.restore_config_file(config)
-
# Clean influxdb
if atf.properties["influxdb-started"]:
atf.request_influxdb(f"DROP DATABASE {atf.properties['influxdb_db']}")
diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py
index 9fe5080..0452bb8 100644
--- a/testsuite/python/lib/atf.py
+++ b/testsuite/python/lib/atf.py
@@ -28,6 +28,7 @@
import json
import jsondiff
+import jsonpatch
import yaml
# This module will be (un)imported in require_openapi_generator()
@@ -1436,6 +1437,76 @@
)
+def get_deprecated_openapi_spec_patch(openapi_spec, undeprecate=False):
+ """
+ Returns a JsonPatch object that can be applied to the openapi_spec to
+ fully deprecate it, as we should do for older specs in newer Slurm
+ versions.
+
+ If undeprecate is set to True, then it does the opposite, it returns a
+ JsonPatch to remove all deprecate marks of the openapi_specs. This
+ should NOT be used in general, but as we generated some openapi_specs
+ with newer Slurm versions, we may need to undeprecate them (see v41).
+ """
+ ops = []
+
+ def _escape_json_pointer(s: str) -> str:
+ return s.replace("~", "~0").replace("/", "~1")
+
+ for raw_path, path_obj in openapi_spec.get("paths", {}).items():
+ escaped_path = _escape_json_pointer(raw_path)
+
+ for method, operation in path_obj.items():
+ if method.lower() not in (
+ "get",
+ "post",
+ "put",
+ "delete",
+ "patch",
+ "head",
+ "options",
+ ):
+ continue
+
+ # 1. Mark the operation itself as deprecated
+ if not undeprecate:
+ ops.append(
+ {
+ "op": "add",
+ "path": f"/paths/{escaped_path}/{method}/deprecated",
+ "value": True,
+ }
+ )
+ else:
+ ops.append(
+ {
+ "op": "remove",
+ "path": f"/paths/{escaped_path}/{method}/deprecated",
+ }
+ )
+
+ # 2. Mark every parameter as deprecated
+ params = operation.get("parameters", [])
+ for i, _ in enumerate(params):
+ if not undeprecate:
+ ops.append(
+ {
+ "op": "add",
+ "path": f"/paths/{escaped_path}/{method}/parameters/{i}/deprecated",
+ "value": True,
+ }
+ )
+ else:
+ ops.append(
+ {
+ "op": "remove",
+ "path": f"/paths/{escaped_path}/{method}/parameters/{i}/deprecated",
+ }
+ )
+
+ return jsonpatch.JsonPatch(ops)
+
+
def assert_openapi_spec_eq(spec_a, spec_b):
"""
Asserts that two OpenAPI specifications are equal, ignoring some info and
@@ -1606,126 +1677,6 @@
)
-def backup_config_file(config="slurm.conf"):
- """Backs up a configuration file.
-
- This function may only be used in auto-config mode.
-
- Args:
- config (string): Name of the config file to back up.
-
- Returns:
- None
-
- Example:
- >>> backup_config_file('slurm.conf')
- >>> backup_config_file('gres.conf')
- >>> backup_config_file('cgroup.conf')
- """
-
- if not properties["auto-config"]:
- require_auto_config(f"wants to modify the {config} file")
-
- properties["configurations-modified"].add(config)
-
- config_file = f"{properties['slurm-config-dir']}/{config}"
- backup_config_file = f"{config_file}.orig-atf"
-
- # If a backup already exists, issue a warning and return (honor existing backup)
- if os.path.isfile(backup_config_file):
- logging.trace(f"Backup file already exists ({backup_config_file})")
- return
-
- # If the file to backup does not exist, touch an empty backup file with
- # the sticky bit set. restore_config_file will remove the file.
- if not os.path.isfile(config_file):
- run_command(
- f"touch {backup_config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
- run_command(
- f"chmod 1000 {backup_config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
-
- # Otherwise, copy the config file to the backup
- else:
- run_command(
- f"cp {config_file} {backup_config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
-
-
-def restore_config_file(config="slurm.conf"):
- """Restores a configuration file.
-
- This function may only be used in auto-config mode.
-
- Args:
- config (string): Name of config file to restore.
-
- Returns:
- None
-
- Example:
- >>> restore_config_file('slurm.conf')
- >>> restore_config_file('gres.conf')
- >>> restore_config_file('cgroup.conf')
- """
-
- config_file = f"{properties['slurm-config-dir']}/{config}"
- backup_config_file = f"{config_file}.orig-atf"
-
- properties["configurations-modified"].remove(config)
-
- # If backup file doesn't exist, it has probably already been
- # restored by a previous call to restore_config_file
- if not os.path.isfile(backup_config_file):
- logging.trace(
- f"Backup file does not exist for {config_file}. It has probably already been restored."
- )
- return
-
- # If the sticky bit is set and the file is empty, remove both the file and the backup
- backup_stat = os.stat(backup_config_file)
- if backup_stat.st_size == 0 and backup_stat.st_mode & stat.S_ISVTX:
- run_command(
- f"rm -f {backup_config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
- if os.path.isfile(config_file):
- run_command(
- f"rm -f {config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
-
- # Otherwise, copy backup config file to primary config file
- # and remove the backup (.orig-atf)
- else:
- run_command(
- f"cp {backup_config_file} {config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
- run_command(
- f"rm -f {backup_config_file}",
- user=properties["slurm-user"],
- fatal=True,
- quiet=True,
- )
-
-
def get_config(live=True, source="slurm", quiet=False, delimiter="="):
"""Returns the Slurm configuration as a dictionary.
@@ -1943,9 +1894,6 @@
config_file = f"{properties['slurm-config-dir']}/{config}.conf"
- # This has the side-effect of adding config to configurations-modified
- backup_config_file(f"{config}.conf")
-
# Remove all matching parameters and append the new parameter
lines = []
output = run_command_output(
@@ -2297,7 +2245,6 @@
)
# In auto-config
- backup_config_file(filename)
run_command(
f"cat > {file_path}", input=content, user=properties["slurm-user"], fatal=True
)
@@ -3028,7 +2975,6 @@
)
# Write the config file back out with the modifications
- backup_config_file("slurm.conf")
new_config_string = "\n".join(new_config_lines)
run_command(
f"echo '{new_config_string}' > {config_file}",
@@ -4337,7 +4283,6 @@
new_config_lines.insert(last_node_line_index + 1, node_line)
# Write the config file back out with the modifications
- backup_config_file("slurm.conf")
new_config_string = "\n".join(new_config_lines)
run_command(
f"echo '{new_config_string}' > {config_file}",
@@ -5110,7 +5055,6 @@
)
# Write the config file back out with the modifications
- backup_config_file("slurm.conf")
new_config_string = "\n".join(new_config_lines)
run_command(
f"echo '{new_config_string}' > {config_file}",
@@ -5266,6 +5210,10 @@
if "influxdb_db" in testsuite_config:
properties["influxdb_db"] = testsuite_config["influxdb_db"]
+# TODO: The SlurmConfigDirBase should be passed from the testsuite.conf too,
+# instead of the hadcoded prefix+etc.orig form here.
+properties["slurm-config-orig-dir"] = properties["slurm-prefix"] + "/etc.orig"
+
# Set derived directory properties
# The environment (e.g. PATH, SLURM_CONF) overrides the configuration.
# If the Slurm clients and daemons are not in the current PATH
diff --git a/testsuite/python/tests/test_100_1.py b/testsuite/python/tests/test_100_1.py
index 3e4aefc..61767ba 100644
--- a/testsuite/python/tests/test_100_1.py
+++ b/testsuite/python/tests/test_100_1.py
@@ -55,6 +55,14 @@
"""
# TODO: Remove xfail_tests.append() once their issue is fixed.
+if atf.get_version() < (26, 5):
+ xfail_tests.append(
+ (
+ "common/test_timespec_t.c",
+ "test_rem",
+ "Issue #50096. Math operations fail cross negatives",
+ )
+ )
if atf.get_version() < (25, 11):
skip_tests.append(
(
@@ -82,14 +90,6 @@
"conmgr test doesn't compile for versions < 25.05",
)
)
-else:
- xfail_tests.append(
- (
- "common/test_timespec_t.c",
- "test_normalize",
- "Issue #50096. Math operations fail cross negatives",
- )
- )
# Create a test_function() for all test file in the testsuite_check_dir.
# All the test_function()s will be run by pytest as if actually defined here.
diff --git a/testsuite/python/tests/test_112_41.py b/testsuite/python/tests/test_112_41.py
index 08e1c59..c9a8953 100644
--- a/testsuite/python/tests/test_112_41.py
+++ b/testsuite/python/tests/test_112_41.py
@@ -10,6 +10,8 @@
# import json
# import logging
+import jsonpatch
+
random.seed()
cluster_name = f"test-cluster-{random.randrange(0, 99999999999)}"
@@ -229,9 +231,60 @@
assert "/slurmdb/v0.0.41/jobs/" in spec["paths"].keys()
-@pytest.mark.xfail(reason="Ticket 23807: Schema changed")
@pytest.mark.parametrize("openapi_spec", ["41"], indirect=True)
def test_specification(openapi_spec):
+ base_path = "/components/schemas/v0.0.41_"
+
+ # NOTE: The OpenAPI spec for v41 was generated by slurmrestd 25.11 instead
+ # of its original version back in 24.05. Therefore, changes done
+ # from 24.05 to 25.11 should be reverted from the spec to test
+ # in older versions.
+ if atf.get_version("sbin/slurmrestd") < (25, 11):
+ # In 25.11 we deprecated the whole v41, we need to "undeprecate" it for
+ # older versions
+ patch = atf.get_deprecated_openapi_spec_patch(openapi_spec, undeprecate=True)
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") < (25, 5):
+ # Issue 50233: In 25.05 we added the delete reservation endpoint.
+ path = "/paths/~1slurm~1v0.0.41~1reservation~1{reservation_name}/delete"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # In 25.05 we removed frontend code
+ path = "/paths/~1slurmdb~1v0.0.41~1cluster~1{cluster_name}/delete/parameters/4/schema/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+ path = "/paths/~1slurmdb~1v0.0.41~1cluster~1{cluster_name}/get/parameters/4/schema/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+ path = "/components/schemas/v0.0.41_cluster_rec_flags/items/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # In 25.05 we remove node_record_t->alloc_tres_weighted (!881), and
+ # deprecated it in the REST API
+ path = "/components/schemas/v0.0.41_node/properties/tres_weighted/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 5):
+ # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+ path = base_path + "job_submit_req/properties/script/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 11):
+ # Ticket 23874: In 25.11 we deprecated parts_packed
+ path = base_path + "stats_msg/properties/parts_packed/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+ path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+ patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+ patch.apply(openapi_spec, in_place=True)
+
atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
diff --git a/testsuite/python/tests/test_112_42.py b/testsuite/python/tests/test_112_42.py
index 3abc519..a17a996 100644
--- a/testsuite/python/tests/test_112_42.py
+++ b/testsuite/python/tests/test_112_42.py
@@ -5,6 +5,7 @@
import pytest
import getpass
import json
+import jsonpatch
import random
import logging
import os
@@ -229,13 +230,59 @@
assert "/slurmdb/v0.0.42/jobs/" in spec["paths"].keys()
-@pytest.mark.xfail(
- atf.get_version("sbin/slurmrestd") >= (25, 11)
- or atf.get_version("sbin/slurmrestd") <= (24, 11, 6),
- reason="Ticket 23807: Schema changed",
-)
@pytest.mark.parametrize("openapi_spec", ["42"], indirect=True)
def test_specification(openapi_spec):
+ base_path = "/components/schemas/v0.0.42_"
+
+ # NOTE: The OpenAPI spec for v42 was generated by slurmrestd 25.11 instead
+ # of its original version back in 24.11. Therefore, changes done
+ # from 24.11 to 25.11 should be reverted from the spec to test
+ # in older versions.
+ if atf.get_version("sbin/slurmrestd") < (25, 5):
+ # Issue 50233: In 25.05 we added the delete reservation endpoint.
+ path = "/paths/~1slurm~1v0.0.42~1reservation~1{reservation_name}/delete"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # In 25.05 we removed frontend code
+ path = "/paths/~1slurmdb~1v0.0.42~1cluster~1{cluster_name}/delete/parameters/4/schema/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+ path = "/paths/~1slurmdb~1v0.0.42~1cluster~1{cluster_name}/get/parameters/4/schema/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+ path = "/components/schemas/v0.0.42_cluster_rec_flags/items/enum/2"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": "FRONT_END"}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # In 25.05 we remove node_record_t->alloc_tres_weighted (!881), and
+ # deprecated it in the REST API
+ path = "/components/schemas/v0.0.42_node/properties/tres_weighted/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 5):
+ # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+ path = base_path + "job_submit_req/properties/script/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 11):
+ # Ticket 23874: In 25.11 we deprecated parts_packed
+ path = base_path + "stats_msg/properties/parts_packed/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+ path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+ patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (26, 5):
+ # This is expected to be deprecated in 26.05+
+ patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+ patch.apply(openapi_spec, in_place=True)
+
atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
diff --git a/testsuite/python/tests/test_112_43.py b/testsuite/python/tests/test_112_43.py
index 7d4f03d..c2d6db6 100644
--- a/testsuite/python/tests/test_112_43.py
+++ b/testsuite/python/tests/test_112_43.py
@@ -5,6 +5,7 @@
import pytest
import getpass
import json
+import jsonpatch
import random
import logging
import time
@@ -230,12 +231,32 @@
assert "/slurmdb/v0.0.43/jobs/" in spec["paths"].keys()
-@pytest.mark.xfail(
- atf.get_version("sbin/slurmrestd") >= (25, 11),
- reason="Ticket 23807: Schema changed",
-)
@pytest.mark.parametrize("openapi_spec", ["43"], indirect=True)
def test_specification(openapi_spec):
+ base_path = "/components/schemas/v0.0.43_"
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 5):
+ # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+ path = base_path + "job_submit_req/properties/script/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 11):
+ # Ticket 23874: In 25.11 we deprecated parts_packed
+ path = base_path + "stats_msg/properties/parts_packed/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "add", "path": path, "value": True}])
+ patch.apply(openapi_spec, in_place=True)
+
+ # Ticket 23874: In 25.11 we converted schedule_cycle_sum to int64
+ path = base_path + "stats_msg/properties/schedule_cycle_sum/format"
+ patch = jsonpatch.JsonPatch([{"op": "replace", "path": path, "value": "int64"}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (26, 11):
+ # This is expected to be deprecated in 26.11+
+ patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+ patch.apply(openapi_spec, in_place=True)
+
atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
diff --git a/testsuite/python/tests/test_112_44.py b/testsuite/python/tests/test_112_44.py
index 497d24b..5dd8d28 100644
--- a/testsuite/python/tests/test_112_44.py
+++ b/testsuite/python/tests/test_112_44.py
@@ -5,6 +5,7 @@
import pytest
import getpass
import json
+import jsonpatch
import random
import logging
import time
@@ -291,6 +292,19 @@
@pytest.mark.parametrize("openapi_spec", ["44"], indirect=True)
def test_specification(openapi_spec):
+ base_path = "/components/schemas/v0.0.44_"
+
+ if atf.get_version("sbin/slurmrestd") >= (25, 11):
+ # Ticket 2450: We finally decided to revert deprecating job_submit_req script field
+ path = base_path + "job_submit_req/properties/script/deprecated"
+ patch = jsonpatch.JsonPatch([{"op": "remove", "path": path}])
+ patch.apply(openapi_spec, in_place=True)
+
+ if atf.get_version("sbin/slurmrestd") >= (27, 5):
+ # This is expected to be deprecated in 27.05+
+ patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+ patch.apply(openapi_spec, in_place=True)
+
atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
diff --git a/testsuite/python/tests/test_112_45.py b/testsuite/python/tests/test_112_45.py
index 82cc32b..623ff8f 100644
--- a/testsuite/python/tests/test_112_45.py
+++ b/testsuite/python/tests/test_112_45.py
@@ -295,6 +295,11 @@
)
@pytest.mark.parametrize("openapi_spec", ["45"], indirect=True)
def test_specification(openapi_spec):
+ if atf.get_version("sbin/slurmrestd") >= (27, 11):
+ # This is expected to be deprecated in 27.11+
+ patch = atf.get_deprecated_openapi_spec_patch(openapi_spec)
+ patch.apply(openapi_spec, in_place=True)
+
atf.assert_openapi_spec_eq(openapi_spec, atf.properties["openapi_spec"])
@@ -1264,8 +1269,8 @@
assert job.user == local_user_name
# Update job in db -- posting to /job/jobid/
- atf.wait_for_job_accounted(jobid1, "End", fatal=True)
- atf.wait_for_job_accounted(jobid2, "End", fatal=True)
+ atf.wait_for_job_accounted(jobid1, "State", "COMPLETED", fatal=True)
+ atf.wait_for_job_accounted(jobid2, "State", "COMPLETED", fatal=True)
atf.run_command(
f"sacctmgr -i mod user {local_cluster_name} set AdminLevel=Admin",
diff --git a/testsuite/python/tests/test_133_1.c b/testsuite/python/tests/test_133_1.c
index 6c3fb66..5008169 100644
--- a/testsuite/python/tests/test_133_1.c
+++ b/testsuite/python/tests/test_133_1.c
@@ -192,7 +192,12 @@
fatal("unable to split forward hostlist");
}
END_TIMER;
+#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(26, 5, 0)
+ /* In #50113 we converted to timespec_t logic with new macros */
et = TIMER_DURATION_USEC();
+#else
+ et = DELTA_TIMER;
+#endif
for (j = 0; j < hl_count; j++) {
hostlist_destroy(sp_hl[j]);
}
diff --git a/testsuite/slurm_unit/backfill/dummy_functions.c b/testsuite/slurm_unit/backfill/dummy_functions.c
index 862ad52..f361030 100644
--- a/testsuite/slurm_unit/backfill/dummy_functions.c
+++ b/testsuite/slurm_unit/backfill/dummy_functions.c
@@ -103,13 +103,15 @@
return 1;
}
-void job_state_set(job_record_t *job_ptr, uint32_t state)
+void slurm_job_state_set(job_record_t *job_ptr, uint32_t state,
+ const char *caller)
{
debug("%s %pJ %u", __func__, job_ptr, state);
job_ptr->job_state = state;
}
-void job_state_unset_flag(job_record_t *job_ptr, uint32_t flag)
+void slurm_job_state_unset_flag(job_record_t *job_ptr, uint32_t flag,
+ const char *caller)
{
uint32_t job_state;