blob: a71849ab6a395c996d481744259e8db4aa816730 [file] [log] [blame]
<!--
Copyright (C) 2005-2007 The Regents of the University of California.
Copyright (C) 2008-2011 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov>
This file is part of Slurm, a resource management program.
For details, see <https://slurm.schedmd.com/>.
Slurm is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with Slurm; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-->
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>Slurm System Configuration Tool</title>
<SCRIPT type="text/javascript">
<!--
function print_pair(name,value)
{
if (value)
return name + "=" + value
return "#" + name + "="
}
function get_field(name,form)
{
if (form.value)
return name + "=" + form.value
return "#" + name + "="
}
function get_field2(name,form)
{
if (form.value)
return name + "=" + form.value
return ""
}
function get_radio_field_skipfirst(name,form)
{
for (var i=1; i < form.length; i++)
{
if (form[i].checked)
{
return name + "=" + form[i].value
}
}
return "#" + name + "="
}
function get_radio_value(form)
{
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
return form[i].value
}
}
}
function get_checkbox_value(form)
{
comma_list = ""
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
if (comma_list != "")
comma_list = comma_list +","
comma_list = comma_list+form[i].value
}
}
return comma_list
}
// When generating TaskPluginParam return the parameters that
// correspond to the checked task_plugin
function get_task_plugin_param()
{
for (var i=0; i<document.config.task_plugin.length; i++) {
if (document.config.task_plugin[i].checked) {
if (document.config.task_plugin[i].value == "affinity") {
return "TaskPluginParam=" +
get_radio_value(document.config.task_plugin_param)
}
}
}
return "#TaskPluginParam="
}
function hide_box()
{
var popup = document.getElementById('out_box');
popup.style.visibility = 'hidden';
}
function displayfile()
{
var printme = "# slurm.conf file generated by configurator easy.html.<br>" +
"# Put this file on all nodes of your cluster.<br>" +
"# See the slurm.conf man page for more information.<br>" +
"#<br>" +
get_field("ClusterName",document.config.cluster_name) + "<br>" +
"SlurmctldHost=" + document.config.control_machine.value + "<br>" +
"# <br>" +
"#MailProg=/bin/mail <br>" +
print_pair("MpiDefault",get_radio_value(document.config.mpi_default)) + "<br>" +
"#MpiParams=ports=#-# <br>" +
"ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" +
"ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
"SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
"#SlurmctldPort=6817 <br>" +
"SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" +
"#SlurmdPort=6818 <br>" +
"SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" +
"SlurmUser=" + document.config.slurm_user.value + "<br>" +
"#SlurmdUser=root <br>" +
"StateSaveLocation=" + document.config.state_save_location.value + "<br>" +
print_pair("SwitchType",get_radio_value(document.config.switch_type)) + "<br>" +
"TaskPlugin=" + get_checkbox_value(document.config.task_plugin) + "<br>" +
"# <br>" +
"# <br>" +
"# TIMERS <br>" +
"#KillWait=30 <br>" +
"#MinJobAge=300 <br>" +
"#SlurmctldTimeout=120 <br>" +
"#SlurmdTimeout=300 <br>" +
"# <br>" +
"# <br>" +
"# SCHEDULING <br>" +
"SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
"SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" +
"# <br>" +
"# <br>" +
"# LOGGING AND ACCOUNTING <br>" +
print_pair("AccountingStorageType",get_radio_value(document.config.accounting_storage_type)) + "<br>" +
"#JobAcctGatherFrequency=30 <br>" +
print_pair("JobAcctGatherType",get_radio_value(document.config.job_acct_gather_type)) + "<br>" +
"#SlurmctldDebug=info <br>" +
get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" +
"#SlurmdDebug=info <br>" +
get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" +
"# <br>" +
"# <br>" +
"# COMPUTE NODES <br>" +
"NodeName=" + document.config.node_name.value +
get_field2(" NodeAddr",document.config.node_addr) +
get_field2(" CPUs",document.config.procs) +
get_field2(" RealMemory",document.config.memory) +
get_field2(" Sockets",document.config.sockets) +
get_field2(" CoresPerSocket",document.config.cores_per_socket) +
get_field2(" ThreadsPerCore",document.config.threads_per_core) +
" State=UNKNOWN <br>" +
"PartitionName=" + document.config.partition_name.value +
" Nodes=ALL" +
" Default=YES"+
" MaxTime=" + document.config.max_time.value +
" State=UP"
//scroll(0,0);
//var popup = document.getElementById('out_box');
//popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>";
//popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>";
//popup.innerHTML += printme;
//popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>";
//popup.innerHTML += "<a href='javascript:hide_box();'>close</a>";
//popup.style.visibility = 'visible';
// OLD CODE
document.open();
document.write(printme);
document.close();
}
-->
</SCRIPT>
<!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> -->
</head>
<body>
<form name=config>
<H1>Slurm Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool - Easy Version</H1>
<P>This form can be used to create a Slurm configuration file with
you controlling many of the important configuration parameters.</P>
<p>This is a simplified version of the Slurm configuration tool. This version
has fewer options for creating a Slurm configuration file. The full version
of the Slurm configuration tool is available at
<a href="configurator.html">configurator.html</a>.</p>
<P><B>This tool supports Slurm version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
Configuration files for other versions of Slurm should be built
using the tool distributed with it in <i>doc/html/configurator.html</i>.
Some parameters will be set to default values, but you can
manually edit the resulting <I>slurm.conf</I> as desired
for greater flexibility. See <I>man slurm.conf</I> for more
details about the configuration parameters.</P>
<P>Note the while Slurm daemons create log files and other files as needed,
it treats the lack of parent directories as a fatal error.
This prevents the daemons from running if critical file systems are
not mounted and will minimize the risk of cold-starting (starting
without preserving jobs).</P>
<P>Note that this configuration file must be installed on all nodes
in your cluster.</P>
<P>After you have filled in the fields of interest, use the
"Submit" button on the bottom of the page to build the <I>slurm.conf</I>
file. It will appear on your web browser. Save the file in text format
as <I>slurm.conf</I> for use by Slurm.
<P>For more information about Slurm, see
<A HREF="https://slurm.schedmd.com/slurm.html">https://slurm.schedmd.com/slurm.html</A>
<h2>Cluster Name</h2>
<input type="text" name="cluster_name" value="cluster"> <b>ClusterName</b>:
The name of your cluster. Using different names for each of your clusters is
important when using a single database to record information from multiple
Slurm-managed clusters.
<H2>Control Machines</H2>
Define the hostname of the computer on which the Slurm controller and
optional backup controller will execute.
Hostname values should should not be the fully qualified domain
name (e.g. use <I>tux</I> rather than <I>tux.abc.com</I>).
<P>
<input type="text" name="control_machine" value="linux0"> <B>SlurmctldHost</B>:
Primary Controller Hostname
<P>
<H2>Compute Machines</H2>
Define the machines on which user applications can run.
You can also specify addresses of these computers if desired
(defaults to their hostnames).
Only a few of the possible parameters associated with the nodes will
be set by this tool, but many others are available.
Executing the command <i>slurmd -C</i> on each compute node will print its
physical configuration (sockets, cores, real memory size, etc.), which
can be used in constructing the <i>slurm.conf</i> file.
All of the nodes will be placed into a single partition (or queue)
with global access. Many options are available to group nodes into
partitions with a wide variety of configuration parameters.
Manually edit the <i>slurm.conf</i> produced to exercise these options.
Node names and addresses may be specified using a numeric range specification.
<P>
<input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>:
Compute nodes
<P>
<input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses
(optional)
<P>
<input type="text" name="partition_name" value="debug"> <B>PartitionName</B>:
Name of the one partition to be created
<P>
<input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>:
Maximum time limit of jobs in minutes or INFINITE
<P>
The following parameters describe a node's configuration.
Set a value for <B>CPUs</B>.
The other parameters are optional, but provide more control over scheduled resources:
<P>
<input type="text" name="procs" value="1"> <B>CPUs</B>: Count of processors
on each compute node.
If CPUs is omitted, it will be inferred from:
Sockets, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="sockets" value="">
<B>Sockets</B>:
Number of physical processor sockets/chips on the node.
If Sockets is omitted, it will be inferred from:
CPUs, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="cores_per_socket" value="">
<B>CoresPerSocket</B>:
Number of cores in a single physical processor socket.
The CoresPerSocket value describes physical cores, not
the logical number of processors per socket.
<P>
<input type="text" name="threads_per_core" value="">
<B>ThreadsPerCore</B>:
Number of logical threads in a single physical core.
<P>
<input type="text" name="memory" value=""> <B>RealMemory</B>: Amount
of real memory. This parameter is required when specifying Memory as a
consumable resource with the select/cons_tres plug-in. See below
under Resource Selection.
<P>
<H2>Slurm User</H2>
The Slurm controller (slurmctld) can run without elevated privileges,
so it is recommended that a user "slurm" be created for it. For testing
purposes any user name can be used.
<P>
<input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B>
<P>
<H2>State Preservation</H2>
Define the location of a directory where the slurmctld daemon saves its state.
This should be a fully qualified pathname which can be read and written to
by the Slurm user on both the control machine and backup controller (if configured).
The location of a directory where slurmd saves state should also be defined.
This must be a unique directory on each compute server (local disk).
The use of a highly reliable file system (e.g. RAID) is recommended.
<P>
<input type="text" name="state_save_location" value="/var/spool/slurmctld">
<b>StateSaveLocation</b>: Slurmctld state save directory
<P>
<input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd">
<b>SlurmdSpoolDir</b>: Slurmd state save directory
<P>
Define when a non-responding (DOWN) node is returned to service.<BR>
Select one value for <B>ReturnToService</B>:<BR>
<input type="radio" name="return_to_service" value="0">
<B>0</B>: When explicitly restored to service by an administrator.<BR>
<input type="radio" name="return_to_service" value="1" checked>
<B>1</B>:Upon registration with a valid configuration only if it was set DOWN
due to being non-responsive.<BR>
<input type="radio" name="return_to_service" value="2">
<B>2</B>:Upon registration with a valid configuration.<BR>
<P>
<H2>Scheduling</H2>
Define the mechanism to be used for controlling job ordering.<BR>
Select one value for <B>SchedulerType</B>:<BR>
<input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>:
FIFO with backfill<BR>
<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In
First-Out (FIFO)<BR>
<P>
<H2>Interconnect</H2>
Define the node interconnect used.<BR>
Select one value for <B>SwitchType</B>:<BR>
<input type="radio" name="switch_type" value="switch/hpe_slingshot"> <B>HPE
Slingshot</B>: HPE Slingshot proprietary interconnect<BR>
<input type="radio" name="switch_type" value="" checked> <B>None</B>: No special
handling required (InfiniBand, Myrinet, Ethernet, etc.)<BR>
<P>
<H2>Default MPI Type</H2>
Specify the type of MPI to be used by default. Slurm will configure environment
variables accordingly. Users can over-ride this specification with an srun option.<BR>
Select one value for <B>MpiDefault</B>:<BR>
<input type="radio" name="mpi_default" value="mpi/pmi2"> <B>MPI-PMI2</B>
(For PMI2-supporting MPI implementations)<BR>
<input type="radio" name="mpi_default" value="mpi/pmix"> <B>MPI-PMIx</B>
(Exascale PMI implementation)<BR>
<input type="radio" name="mpi_default" value="" checked> <B>None</B>:
This works for most other MPI types.<BR>
<P>
<H2>Process Tracking</H2>
Define the algorithm used to identify which processes are associated with a
given job. This is used signal, kill, and account for the processes associated
with a job step.<BR>
Select one value for <B>ProctrackType</B>:<BR>
<input type="radio" name="proctrack_type" value="cgroup" checked> <B>Cgroup</B>: Use
Linux <i>cgroup</i> to create a job container and track processes.
Build a <i>cgroup.conf</i> file as well<BR>
<input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use
parent process ID records, processes can escape from Slurm control<BR>
<input type="radio" name="proctrack_type" value="pgid"> <B>Pgid</B>: Use Unix
Process Group ID, processes changing their process group ID can escape from Slurm
control<BR>
<P>
<H2>Resource Selection</H2>
Define resource (node) selection algorithm to be used.<BR>
Select one value for <B>SelectType</B>:<BR>
<input type="radio" name="select_type" value="cons_tres" checked>
<b>cons_tres</b>: Allocate individual processors, memory, GPUs, and other
trackable resources<br />
<input type="radio" name="select_type" value="linear">
<B>Linear</B>: Node-base
resource allocation, does not manage individual processor allocation<BR>
<P>
<H2>Task Launch</H2>
Define a task launch plugin. This may be used to
provide resource management within a node (e.g. pinning
tasks to specific processors).
Select one value for <B>TaskPlugin</B>:<BR>
<input type="checkbox" name="task_plugin" value="task/none"> <B>None</B>: No task launch actions<BR>
<input type="checkbox" name="task_plugin" value="task/affinity" checked> <B>Affinity</B>:
CPU affinity support
(see srun man pages for the --cpu-bind, --mem-bind, and -E options)<BR>
<input type="checkbox" name="task_plugin" value="task/cgroup" checked> <B>Cgroup</B>:
Allocated resources constraints enforcement using Linux Control Groups
(see cgroup.conf man page)
<P>
<H2>Event Logging</H2>
Slurmctld and slurmd daemons can each be configured with different
levels of logging verbosity from 0 (quiet) to 7 (extremely verbose).
Each may also be configured to use debug files. Use fully qualified
pathnames for the files.
<P>
<input type="text" name="slurmctld_logfile" value="/var/log/slurmctld.log">
<b>SlurmctldLogFile</b> (if empty, log goes to syslog)
<P>
<input type="text" name="slurmd_logfile" value="/var/log/slurmd.log">
<b>SlurmdLogFile</b> (if empty, log goes to syslog. String "%h" in name gets
replaced with hostname)
<P>
<H2>Job Accounting Gather</H2>
Slurm accounts for resource use per job. System specifics can be polled
determined by system type<BR>
Select one value for <B>JobAcctGatherType</B>:<BR>
<input type="radio" name="job_acct_gather_type" value="" checked> <B>None</B>: No
job accounting<BR>
<input type="radio" name="job_acct_gather_type" value="jobacct_gather/cgroup"> <b>cgroup</b>:
Specific Linux cgroup information gathered, use with Linux systems only<br>
<input type="radio" name="job_acct_gather_type" value="jobacct_gather/linux"> <B>Linux</B>: Specific
Linux process table information gathered, use with Linux systems only<BR>
<H2>Job Accounting Storage</H2>
Used with the Job Accounting Gather Slurm can store the accounting information in many different fashions. Fill in your systems choice here<BR>
Select one value for <B>AccountingStorageType</B>:<BR>
<input type="radio" name="accounting_storage_type" value="" checked> <B>None</B>:
No job accounting storage<BR>
<input type="radio" name="accounting_storage_type" value="accounting_storage/slurmdbd"> <B>SlurmDBD</B>:
Write job accounting to SlurmDBD (database daemon) which can securely
save the data from many Slurm managed clusters into a common database<BR>
<P>
<H2>Process ID Logging</H2>
Define the location into which we can record the daemon's process ID.
This is used for locate the appropriate daemon for signaling.
Specify a specify the fully qualified pathname for the file.
<P>
<input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid">
<B>SlurmctldPidFile</B>
<P>
<input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid">
<B>SlurmdPidFile</B>
<P>
<BR>
<BR>
<input type=button value="Submit" onClick="javascript:displayfile()">
<input type=reset value="Reset Form">
</form>
<hr>
<p>
<a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br>
Last modified 13 March 2024</p>
</body>
</html>