blob: 3f2a7ea3cf1463ef6e29ad309186628da730f176 [file] [log] [blame]
<!--
Copyright (C) 2005-2007 The Regents of the University of California.
Copyright (C) 2008-2011 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov>
This file is part of Slurm, a resource management program.
For details, see <https://slurm.schedmd.com/>.
Slurm is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with Slurm; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-->
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>Slurm System Configuration Tool</title>
<SCRIPT type="text/javascript">
<!--
function print_pair(name,value)
{
if (value)
return name + "=" + value
return "#" + name + "="
}
function get_field(name,form)
{
if (form.value)
return name + "=" + form.value
return "#" + name + "="
}
function get_field2(name,form)
{
if (form.value)
return name + "=" + form.value
return ""
}
function get_radio_field_skipfirst(name,form)
{
for (var i=1; i < form.length; i++)
{
if (form[i].checked)
{
return name + "=" + form[i].value
}
}
return "#" + name + "="
}
function get_radio_value(form)
{
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
return form[i].value
}
}
}
function get_checkbox_value(form)
{
comma_list = ""
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
if (comma_list != "")
comma_list = comma_list +","
comma_list = comma_list+form[i].value
}
}
return comma_list
}
function hide_box()
{
var popup = document.getElementById('out_box');
popup.style.visibility = 'hidden';
}
function displayfile()
{
var printme = "# slurm.conf file generated by configurator.html.<br>" +
"# Put this file on all nodes of your cluster.<br>" +
"# See the slurm.conf man page for more information.<br>" +
"#<br>" +
get_field("ClusterName",document.config.cluster_name) + "<br>" +
"SlurmctldHost=" + document.config.control_machine.value + "<br>" +
get_field("SlurmctldHost",document.config.backup_controller) + "<br>" +
"# <br>" +
"#DisableRootJobs=NO <br>" +
"#EnforcePartLimits=NO <br>" +
get_field("Epilog",document.config.epilog) + "<br>" +
"#EpilogSlurmctld= <br>" +
"#FirstJobId=1 <br>" +
"#MaxJobId=67043328 <br>" +
"#GresTypes= <br>" +
"#GroupUpdateForce=0 <br>" +
"#GroupUpdateTime=600 <br>" +
"#JobFileAppend=0 <br>" +
"#JobRequeue=1 <br>" +
"#JobSubmitPlugins=lua <br>" +
"#KillOnBadExit=0 <br>" +
"#LaunchType=launch/slurm <br>" +
"#Licenses=foo*4,bar <br>" +
"#MailProg=/bin/mail <br>" +
"#MaxJobCount=10000 <br>" +
"#MaxStepCount=40000 <br>" +
"#MaxTasksPerNode=512 <br>" +
print_pair("MpiDefault",get_radio_value(document.config.mpi_default)) + "<br>" +
"#MpiParams=ports=#-# <br>" +
"#PluginDir= <br>" +
"#PlugStackConfig= <br>" +
"#PrivateData=jobs <br>" +
"ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" +
get_field("Prolog",document.config.prolog) + "<br>" +
"#PrologFlags= <br>" +
"#PrologSlurmctld= <br>" +
"#PropagatePrioProcess=0 <br>" +
"#PropagateResourceLimits= <br>" +
"#PropagateResourceLimitsExcept= <br>" +
"#RebootProgram= <br>" +
"ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
"SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
"SlurmctldPort=" + document.config.slurmctld_port.value + "<br>" +
"SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" +
"SlurmdPort=" + document.config.slurmd_port.value + "<br>" +
"SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" +
"SlurmUser=" + document.config.slurm_user.value + "<br>" +
"#SlurmdUser=root <br>" +
get_field("SrunEpilog",document.config.srun_epilog) + "<br>" +
get_field("SrunProlog",document.config.srun_prolog) + "<br>" +
"StateSaveLocation=" + document.config.state_save_location.value + "<br>" +
print_pair("SwitchType",get_radio_value(document.config.switch_type)) + "<br>" +
get_field("TaskEpilog",document.config.task_epilog) + "<br>" +
"TaskPlugin=" + get_checkbox_value(document.config.task_plugin) + "<br>" +
get_field("TaskProlog",document.config.task_prolog) + "<br>" +
"#TopologyPlugin=topology/tree <br>" +
"#TmpFS=/tmp <br>" +
"#TrackWCKey=no <br>" +
"#TreeWidth= <br>" +
"#UnkillableStepProgram= <br>" +
"#UsePAM=0 <br>" +
"# <br>" +
"# <br>" +
"# TIMERS <br>" +
"#BatchStartTimeout=10 <br>" +
"#CompleteWait=0 <br>" +
"#EpilogMsgTime=2000 <br>" +
"#GetEnvTimeout=2 <br>" +
"#HealthCheckInterval=0 <br>" +
"#HealthCheckProgram= <br>" +
"InactiveLimit=" + document.config.inactive_limit.value + "<br>" +
"KillWait=" + document.config.kill_wait.value + "<br>" +
"#MessageTimeout=10 <br>" +
"#ResvOverRun=0 <br>" +
"MinJobAge=" + document.config.min_job_age.value + "<br>" +
"#OverTimeLimit=0 <br>" +
"SlurmctldTimeout=" + document.config.slurmctld_timeout.value + "<br>" +
"SlurmdTimeout=" + document.config.slurmd_timeout.value + "<br>" +
"#UnkillableStepTimeout=60 <br>" +
"#VSizeFactor=0 <br>" +
"Waittime=" + document.config.wait_time.value + "<br>" +
"# <br>" +
"# <br>" +
"# SCHEDULING <br>" +
"#DefMemPerCPU=0 <br>" +
"#MaxMemPerCPU=0 <br>" +
"#SchedulerTimeSlice=30 <br>" +
"SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
"SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" +
"# <br>" +
"# <br>" +
"# JOB PRIORITY <br>" +
"#PriorityFlags= <br>" +
"#PriorityType=priority/multifactor <br>" +
"#PriorityDecayHalfLife= <br>" +
"#PriorityCalcPeriod= <br>" +
"#PriorityFavorSmall= <br>" +
"#PriorityMaxAge= <br>" +
"#PriorityUsageResetPeriod= <br>" +
"#PriorityWeightAge= <br>" +
"#PriorityWeightFairshare= <br>" +
"#PriorityWeightJobSize= <br>" +
"#PriorityWeightPartition= <br>" +
"#PriorityWeightQOS= <br>" +
"# <br>" +
"# <br>" +
"# LOGGING AND ACCOUNTING <br>" +
"#AccountingStorageEnforce=0 <br>" +
get_field("AccountingStorageHost",document.config.accounting_storage_host) + "<br>" +
get_field("AccountingStoragePort",document.config.accounting_storage_port) + "<br>" +
print_pair("AccountingStorageType",get_radio_value(document.config.accounting_storage_type)) + "<br>" +
get_field("AccountingStoreFlags",document.config.acctng_store_flags) + "<br>" +
get_field("JobCompHost",document.config.job_comp_host) + "<br>" +
get_field("JobCompLoc",document.config.job_comp_loc) + "<br>" +
get_field("JobCompParams",document.config.job_comp_params) + "<br>" +
get_field("JobCompPass",document.config.job_comp_pass) + "<br>" +
get_field("JobCompPort",document.config.job_comp_port) + "<br>" +
"JobCompType=jobcomp/" + get_radio_value(document.config.job_comp_type) + "<br>" +
get_field("JobCompUser",document.config.job_comp_user) + "<br>" +
"#JobContainerType= <br>" +
get_field("JobAcctGatherFrequency",document.config.job_acct_gather_frequency) + "<br>" +
print_pair("JobAcctGatherType",get_radio_value(document.config.job_acct_gather_type)) + "<br>" +
"SlurmctldDebug=" + document.config.slurmctld_debug.value + "<br>" +
get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" +
"SlurmdDebug=" + document.config.slurmd_debug.value + "<br>" +
get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" +
"#SlurmSchedLogFile= <br>" +
"#SlurmSchedLogLevel= <br>" +
"#DebugFlags= <br>" +
"# <br>" +
"# <br>" +
"# POWER SAVE SUPPORT FOR IDLE NODES (optional) <br>" +
"#SuspendProgram= <br>" +
"#ResumeProgram= <br>" +
"#SuspendTimeout= <br>" +
"#ResumeTimeout= <br>" +
"#ResumeRate= <br>" +
"#SuspendExcNodes= <br>" +
"#SuspendExcParts= <br>" +
"#SuspendRate= <br>" +
"#SuspendTime= <br>" +
"# <br>" +
"# <br>" +
"# COMPUTE NODES <br>" +
"NodeName=" + document.config.node_name.value +
get_field2(" NodeAddr",document.config.node_addr) +
get_field2(" CPUs",document.config.procs) +
get_field2(" RealMemory",document.config.memory) +
get_field2(" Sockets",document.config.sockets) +
get_field2(" CoresPerSocket",document.config.cores_per_socket) +
get_field2(" ThreadsPerCore",document.config.threads_per_core) +
" State=UNKNOWN <br>" +
"PartitionName=" + document.config.partition_name.value +
" Nodes=ALL" +
" Default=YES"+
" MaxTime=" + document.config.max_time.value +
" State=UP"
//scroll(0,0);
//var popup = document.getElementById('out_box');
//popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>";
//popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>";
//popup.innerHTML += printme;
//popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>";
//popup.innerHTML += "<a href='javascript:hide_box();'>close</a>";
//popup.style.visibility = 'visible';
// OLD CODE
document.open();
document.write(printme);
document.close();
}
-->
</SCRIPT>
<!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> -->
</head>
<body>
<form name=config>
<H1>Slurm Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1>
<P>This form can be used to create a Slurm configuration file with
you controlling many of the important configuration parameters.</P>
<p>This is the full version of the Slurm configuration tool. This version
has all the configuration options to create a Slurm configuration file. There
is a simplified version of the Slurm configuration tool available at
<a href="configurator.easy.html">configurator.easy.html</a>.</p>
<P><B>This tool supports Slurm version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
Configuration files for other versions of Slurm should be built
using the tool distributed with it in <i>doc/html/configurator.html</i>.
Some parameters will be set to default values, but you can
manually edit the resulting <I>slurm.conf</I> as desired
for greater flexibility. See <I>man slurm.conf</I> for more
details about the configuration parameters.</P>
<P>Note the while Slurm daemons create log files and other files as needed,
it treats the lack of parent directories as a fatal error.
This prevents the daemons from running if critical file systems are
not mounted and will minimize the risk of cold-starting (starting
without preserving jobs).</P>
<P>Note that this configuration file must be installed on all nodes
in your cluster.</P>
<P>After you have filled in the fields of interest, use the
"Submit" button on the bottom of the page to build the <I>slurm.conf</I>
file. It will appear on your web browser. Save the file in text format
as <I>slurm.conf</I> for use by Slurm.
<P>For more information about Slurm, see
<A HREF="https://slurm.schedmd.com/slurm.html">https://slurm.schedmd.com/slurm.html</A>
<h2>Cluster Name</h2>
<input type="text" name="cluster_name" value="cluster"> <b>ClusterName</b>:
The name of your cluster. Using different names for each of your clusters is
important when using a single database to record information from multiple
Slurm-managed clusters.
<H2>Control Machines</H2>
Define the hostname of the computer on which the Slurm controller and
optional backup controller will execute.
Hostname values should not be the fully qualified domain
name (e.g. use <I>tux</I> rather than <I>tux.abc.com</I>).
<P>
<input type="text" name="control_machine" value="linux0"> <B>SlurmctldHost</B>:
Primary Controller Hostname
<P>
<input type="text" name="backup_controller"> <B>BackupController</B>: Backup
Controller Hostname (optional)
<P>
<H2>Compute Machines</H2>
Define the machines on which user applications can run.
You can also specify addresses of these computers if desired
(defaults to their hostnames).
Only a few of the possible parameters associated with the nodes will
be set by this tool, but many others are available.
Executing the command <i>slurmd -C</i> on each compute node will print its
physical configuration (sockets, cores, real memory size, etc.), which
can be used in constructing the <i>slurm.conf</i> file.
All of the nodes will be placed into a single partition (or queue)
with global access. Many options are available to group nodes into
partitions with a wide variety of configuration parameters.
Manually edit the <i>slurm.conf</i> produced to exercise these options.
Node names and addresses may be specified using a numeric range specification.
<P>
<input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>:
Compute nodes
<P>
<input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses
(optional)
<P>
<input type="text" name="partition_name" value="debug"> <B>PartitionName</B>:
Name of the one partition to be created
<P>
<input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>:
Maximum time limit of jobs in minutes or INFINITE
<P>
The following parameters describe a node's configuration.
Set a value for <B>CPUs</B>.
The other parameters are optional, but provide more control over scheduled resources:
<P>
<input type="text" name="procs" value="1"> <B>CPUs</B>: Count of processors
on each compute node.
If CPUs is omitted, it will be inferred from:
Sockets, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="sockets" value="">
<B>Sockets</B>:
Number of physical processor sockets/chips on the node.
If Sockets is omitted, it will be inferred from:
CPUs, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="cores_per_socket" value="">
<B>CoresPerSocket</B>:
Number of cores in a single physical processor socket.
The CoresPerSocket value describes physical cores, not
the logical number of processors per socket.
<P>
<input type="text" name="threads_per_core" value="">
<B>ThreadsPerCore</B>:
Number of logical threads in a single physical core.
<P>
<input type="text" name="memory" value=""> <B>RealMemory</B>: Amount
of real memory. This parameter is required when specifying Memory as a
consumable resource with the select/cons_tres plug-in. See below
under Resource Selection.
<P>
<H2>Slurm User</H2>
The Slurm controller (slurmctld) can run without elevated privileges,
so it is recommended that a user "slurm" be created for it. For testing
purposes any user name can be used.
<P>
<input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B>
<P>
<H2>Slurm Port Numbers</H2>
The Slurm controller (slurmctld) requires a unique port for communications
as do the Slurm compute node daemons (slurmd). If not set, slurm ports
are set by checking for an entry in <I>/etc/services</I> and if that
fails by using an interval default set at Slurm build time.
<P>
<input type="text" name="slurmctld_port" value="6817"> <B>SlurmctldPort</B>
<P>
<input type="text" name="slurmd_port" value="6818"> <B>SlurmdPort</B>
<P>
<H2>State Preservation</H2>
Define the location of a directory where the slurmctld daemon saves its state.
This should be a fully qualified pathname which can be read and written to
by the Slurm user on both the control machine and backup controller (if configured).
The location of a directory where slurmd saves state should also be defined.
This must be a unique directory on each compute server (local disk).
The use of a highly reliable file system (e.g. RAID) is recommended.
<P>
<input type="text" name="state_save_location" value="/var/spool/slurmctld">
<b>StateSaveLocation</b>: Slurmctld state save directory
<b>Must be writable by all SlurmctldHost nodes</b>
<P>
<input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd">
<b>SlurmdSpoolDir</b>: Slurmd state save directory
<P>
Define when a non-responding (DOWN) node is returned to service.<BR>
Select one value for <B>ReturnToService</B>:<BR>
<input type="radio" name="return_to_service" value="0">
<B>0</B>: When explicitly restored to service by an administrator.<BR>
<input type="radio" name="return_to_service" value="1" checked>
<B>1</B>:Upon registration with a valid configuration only if it was set DOWN
due to being non-responsive.<BR>
<input type="radio" name="return_to_service" value="2">
<B>2</B>:Upon registration with a valid configuration.<BR>
<P>
<H2>Scheduling</H2>
Define the mechanism to be used for controlling job ordering.<BR>
Select one value for <B>SchedulerType</B>:<BR>
<input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>:
FIFO with backfill<BR>
<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In
First-Out (FIFO)<BR>
<P>
<H2>Interconnect</H2>
Define the node interconnect used.<BR>
Select one value for <B>SwitchType</B>:<BR>
<input type="radio" name="switch_type" value="switch/hpe_slingshot"> <B>HPE
Slingshot</B>: HPE Slingshot proprietary interconnect<BR>
<input type="radio" name="switch_type" value="" checked> <B>None</B>: No special
<P>
<H2>Default MPI Type</H2>
Specify the type of MPI to be used by default. Slurm will configure environment
variables accordingly. Users can over-ride this specification with an srun option.<BR>
Select one value for <B>MpiDefault</B>:<BR>
<input type="radio" name="mpi_default" value="mpi/pmi2"> <B>MPI-PMI2</B>
(For PMI2-supporting MPI implementations)<BR>
<input type="radio" name="mpi_default" value="mpi/pmix"> <B>MPI-PMIx</B>
(Exascale PMI implementation)<BR>
<input type="radio" name="mpi_default" value="" checked> <B>None</B>:
This works for most other MPI types.<BR>
<P>
<H2>Process Tracking</H2>
Define the algorithm used to identify which processes are associated with a
given job. This is used signal, kill, and account for the processes associated
with a job step.<BR>
Select one value for <B>ProctrackType</B>:<BR>
<input type="radio" name="proctrack_type" value="cgroup" checked> <B>Cgroup</B>: Use
Linux <i>cgroup</i> to create a job container and track processes.
Build a <i>cgroup.conf</i> file as well<BR>
<input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use
parent process ID records, processes can escape from Slurm control<BR>
<input type="radio" name="proctrack_type" value="pgid"> <B>Pgid</B>: Use Unix
Process Group ID, processes changing their process group ID can escape from Slurm
control<BR>
<P>
<H2>Resource Selection</H2>
Define resource (node) selection algorithm to be used.<BR>
Select one value for <B>SelectType</B>:<BR>
<input type="radio" name="select_type" value="cons_tres" checked>
<b>cons_tres</b>: Allocate individual processors, memory, GPUs, and other
trackable resources<br />
<input type="radio" name="select_type" value="linear">
<B>Linear</B>: Node-base
resource allocation, does not manage individual processor allocation<BR>
<P>
<H2>Task Launch</H2>
Define a task launch plugin. This may be used to
provide resource management within a node (e.g. pinning
tasks to specific processors).
Select one value for <B>TaskPlugin</B>:<BR>
<input type="checkbox" name="task_plugin" value="task/none"> <B>None</B>: No task launch actions<BR>
<input type="checkbox" name="task_plugin" value="task/affinity" checked> <B>Affinity</B>:
CPU affinity support
(see srun man pages for the --cpu-bind, --mem-bind, and -E options)<BR>
<input type="checkbox" name="task_plugin" value="task/cgroup" checked> <B>Cgroup</B>:
Allocated resources constraints enforcement using Linux Control Groups
(see cgroup.conf man page)
<P>
<H2>Prolog and Epilog</H2>
<P>
<B>Prolog/Epilog</B>: Path that will be executed as root on every
node of a user's job before the job's tasks will be initiated there
and after that job has terminated.
These parameters are optional.
<DL>
<DT> <input type="text" name="prolog" value="" size=40> <B>Prolog</B>
<DT> <input type="text" name="epilog" value="" size=40> <B>Epilog</B>
</DL>
<P>
<B>SrunProlog/Epilog</B>: Fully qualified path to be executed by srun at
job step initiation and termination. These parameters may be overridden by
srun's --prolog and --epilog options
These parameters are optional.
<DL>
<DT> <input type="text" name="srun_prolog" value="" size=40> <B>SrunProlog</B>
<DT> <input type="text" name="srun_epilog" value="" size=40> <B>SrunEpilog</B>
</DL>
<P>
<B>TaskProlog/Epilog</B>: Fully qualified path to be executed as the user
before each task begins execution and after each task terminates.
These parameters are optional.
<DL>
<DT> <input type="text" name="task_prolog" value="" size=40> <B>TaskProlog</B>
<DT> <input type="text" name="task_epilog" value="" size=40> <B>TaskEpilog</B>
</DL>
<H2>Event Logging</H2>
Slurmctld and slurmd daemons can each be configured with different
levels of logging verbosity from 0 (quiet) to 7 (extremely verbose).
Each may also be configured to use debug files. Use fully qualified
pathnames for the files.
<P>
<input type="text" name="slurmctld_debug" value="info"> <b>SlurmctldDebug</b>
(default is info)
<P>
<input type="text" name="slurmctld_logfile" value="/var/log/slurmctld.log">
<b>SlurmctldLogFile</b> (if empty, log goes to syslog)
<P>
<input type="text" name="slurmd_debug" value="info"> <b>SlurmdDebug</b>
(default is info)
<P>
<input type="text" name="slurmd_logfile" value="/var/log/slurmd.log">
<b>SlurmdLogFile</b> (if empty, log goes to syslog. String "%h" in name gets
replaced with hostname)
<P>
<H2>Job Completion Logging</H2>
Define the job completion logging mechanism to be used. Defaults to None.
Select one value for <B>JobCompType</B>:<BR>
<input type="radio" name="job_comp_type" value="none" checked> <B>None</B>:
No job completion logging<BR>
<input type="radio" name="job_comp_type" value="elasticsearch"> <B>Elasticsearch</B>:
Write job completion info to an Elasticsearch server<BR>
<input type="radio" name="job_comp_type" value="filetxt"> <B>FileTxt</B>:
Write job completion status to a text file<BR>
<input type="radio" name="job_comp_type" value="kafka"> <b>Kafka</b>:
Write job completion info to a Kafka server<br>
<input type="radio" name="job_comp_type" value="lua"> <B>Lua</B>:
Use a script called jobcomp.lua to log job completion<BR>
<input type="radio" name="job_comp_type" value="script"> <B>Script</B>:
Use an arbitrary script to log job completion<BR>
<input type="radio" name="job_comp_type" value="mysql"> <B>MySQL</B>:
Write completion status to a MySQL or MariaDB database<BR>
<P>
<input type="text" name="job_comp_loc" value=""> <B>JobCompLoc</B>:
This is the location of the text file to be written to (if JobCompType=filetxt),
or the script to be run (if JobCompType=script), or the URL to the
Elasticsearch server (if JobCompType=elasticsearch), or file containing
librdkafka parameters (if JobCompType=jobcomp/kafka), database name
(for other values of JobCompType).
<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
<input type="text" name="job_comp_host" value=""> <B>JobCompHost</B>:
Host the database is running on for Job completion<br>
<input type="text" name="job_comp_port" value=""> <B>JobCompPort</B>:
Port the database server is listening on for Job completion<br>
<input type="text" name="job_comp_user" value=""> <B>JobCompUser</B>:
User we are to use to talk to the database for Job completion<br>
<input type="text" name="job_comp_params" value=""> <B>JobCompParams</B>:
Pass arbitrary text string to Job completion plugin<br>
<input type="text" name="job_comp_pass" value=""> <B>JobCompPass</B>:
Password we are to use to talk to the database for Job completion<br>
<P>
<H2>Job Accounting Gather</H2>
Slurm accounts for resource use per job. System specifics can be polled
determined by system type<BR>
Select one value for <B>JobAcctGatherType</B>:<BR>
<input type="radio" name="job_acct_gather_type" value="" checked> <B>None</B>: No
job accounting<BR>
<input type="radio" name="job_acct_gather_type" value="jobacct_gather/cgroup"> <b>cgroup</b>:
Specific Linux cgroup information gathered, use with Linux systems only<br>
<input type="radio" name="job_acct_gather_type" value="jobacct_gather/linux"> <B>Linux</B>: Specific
Linux process table information gathered, use with Linux systems only<BR>
<input type="text" name="job_acct_gather_frequency" value="30"> <B>JobAcctGatherFrequency</B>:
polling interval in seconds. Zero disables periodic sampling.<BR>
<P>
<H2>Job Accounting Storage</H2>
Used with the Job Accounting Gather Slurm can store the accounting information in many different fashions. Fill in your systems choice here<BR>
Select one value for <B>AccountingStorageType</B>:<BR>
<input type="radio" name="accounting_storage_type" value="" checked> <B>None</B>:
No job accounting storage<BR>
<input type="radio" name="accounting_storage_type" value="accounting_storage/slurmdbd"> <B>SlurmDBD</B>:
Write job accounting to SlurmDBD (database daemon) which can securely
save the data from many Slurm managed clusters into a common database<BR>
<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
<input type="text" name="accounting_storage_host" value=""> <B>AccountingStorageHost</B>:
Host the database is running on for Job Accounting<br>
<input type="text" name="accounting_storage_port" value=""> <B>AccountingStoragePort</B>:
Port the database server is listening on for Job Accounting<br>
<input type="text" name="acctng_store_flags" value=""> <B>AccountingStoreFlags</B>:
Comma separated list. Options are:<br>
'job_comment' - store the job comment field in the database;<br>
'job_env' - store a batch job's env in the database;<br>
'job_extra' - store a batch job's extra field in the database;<br>
'job_script' - store the job batch script in the database.<br>
<P>
<H2>Process ID Logging</H2>
Define the location into which we can record the daemon's process ID.
This is used for locate the appropriate daemon for signaling.
Specify a specify the fully qualified pathname for the file.
<P>
<input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid">
<B>SlurmctldPidFile</B>
<P>
<input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid">
<B>SlurmdPidFile</B>
<P>
<H2>Timers</H2>
Slurm has a variety of timers to control when to consider a node DOWN,
when to purge job records, how long to give a job to gracefully terminate, etc.
<P>
<input type="text" name="slurmctld_timeout" value="120">
<B>SlurmctldTimeout</B>: How many seconds the backup controller waits before
becoming the active controller
<P>
<input type="text" name="slurmd_timeout" value="300">
<B>SlurmdTimeout</B>: How many seconds the Slurm controller waits for the slurmd
to respond to a request before considering the node DOWN
<P>
<input type="text" name="inactive_limit" value="0">
<B>InactiveLimit</B>: How many seconds the Slurm controller waits for srun
commands to respond before considering the job or job step inactive and
terminating it. A value of zero indicates unlimited wait
<P>
<input type="text" name="min_job_age" value="300">
<B>MinJobAge</B>: How many seconds the Slurm controller waits after a
job terminates before purging its record. A record of the job will
persist in job completion and/or accounting records indefinitely,
but will no longer be visible with the squeue command after puring
<P>
<input type="text" name="kill_wait" value="30">
<B>KillWait</B>: How many seconds a job is given to gracefully terminate
after reaching its time limit and being sent SIGTERM before sending
a SIGKILLL
<P>
<input type="text" name="wait_time" value="0">
<B>WaitTime</B>: How many seconds after a job step's first task terminates
before terminating all remaining tasks. A value of zero indicates unlimited wait
<P>
<BR>
<BR>
<input type=button value="Submit" onClick="javascript:displayfile()">
<input type=reset value="Reset Form">
</form>
<hr>
<p>
<a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br>
Last modified 24 June 2025</p>
</body>
</html>