blob: c904a3d5c2532425027579f85af69d5b12926ee6 [file] [log] [blame] [edit]
<!--
Copyright (C) 2005-2007 The Regents of the University of California.
Copyright (C) 2008-2011 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov>
This file is part of SLURM, a resource management program.
For details, see <http://www.schedmd.com/slurmdocs/>.
SLURM is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with SLURM; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-->
<HTML>
<HEAD><TITLE>SLURM System Configuration Tool</TITLE>
<SCRIPT type="text/javascript">
<!--
function get_field(name,form)
{
if (form.value)
return name + "=" + form.value
return "#" + name + "="
}
function get_field2(name,form)
{
if (form.value)
return name + "=" + form.value
return ""
}
function get_radio_field_skipfirst(name,form)
{
for (var i=1; i < form.length; i++)
{
if (form[i].checked)
{
return name + "=" + form[i].value
}
}
return "#" + name + "="
}
function get_radio_value(form)
{
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
return form[i].value
}
}
}
// When choosing SelectTypeParameters make sure the select_type
// matches the parameter being checked
function set_select_type(form)
{
if (form.name == "cons_res_params") {
document.config.select_type[0].click() // cons_res
} else if (form.name == "linear_params") {
document.config.select_type[1].click() // linear
} else if (form.name == "bluegene_params") {
document.config.select_type[2].click() // bluegene
} else if (form.name == "cray_params") {
document.config.select_type[3].click() // cray
}
}
// When generating SelectTypeParameters return the parameters that
// correspond to the checked select_type
function get_select_type_params()
{
if (document.config.select_type[0].checked) { // cons_res
return get_radio_field_skipfirst("SelectTypeParameters",
document.config.cons_res_params)
} else if (document.config.select_type[1].checked) { // linear
return "#SelectTypeParameters="
} else if (document.config.select_type[2].checked) { // bluegene
return "#SelectTypeParameters="
} else if (document.config.select_type[3].checked) { // cray
return "#SelectTypeParameters="
}
}
// When generating TaskPluginParam return the parameters that
// correspond to the checked task_plugin
function get_task_plugin_param()
{
for (var i=0; i<document.config.task_plugin.length; i++) {
if (document.config.task_plugin[i].checked) {
if (document.config.task_plugin[i].value == "affinity") {
return "TaskPluginParam=" +
get_radio_value(document.config.task_plugin_param)
}
}
}
return "#TaskPluginParam="
}
function hide_box()
{
var popup = document.getElementById('out_box');
popup.style.visibility = 'hidden';
}
function displayfile()
{
var printme = "# slurm.conf file generated by configurator easy.html.<br>" +
"# Put this file on all nodes of your cluster.<br>" +
"# See the slurm.conf man page for more information.<br>" +
"#<br>" +
"ControlMachine=" + document.config.control_machine.value + "<br>" +
get_field("ControlAddr",document.config.control_addr) + "<br>" +
"# <br>" +
"#MailProg=/bin/mail <br>" +
"MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" +
"#MpiParams=ports=#-# <br>" +
"ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" +
"ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
"SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
"#SlurmctldPort=6817 <br>" +
"SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" +
"#SlurmdPort=6818 <br>" +
"SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" +
"SlurmUser=" + document.config.slurm_user.value + "<br>" +
"#SlurmdUser=root <br>" +
"StateSaveLocation=" + document.config.state_save_location.value + "<br>" +
"SwitchType=switch/" + get_radio_value(document.config.switch_type) + "<br>" +
"TaskPlugin=task/" + get_radio_value(document.config.task_plugin) + "<br>" +
"# <br>" +
"# <br>" +
"# TIMERS <br>" +
"#KillWait=30 <br>" +
"#MinJobAge=300 <br>" +
"#SlurmctldTimeout=120 <br>" +
"#SlurmdTimeout=300 <br>" +
"# <br>" +
"# <br>" +
"# SCHEDULING <br>" +
"FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" +
"SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
"#SchedulerPort=7321 <br>" +
"SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" +
"# <br>" +
"# <br>" +
"# LOGGING AND ACCOUNTING <br>" +
"AccountingStorageType=accounting_storage/" + get_radio_value(document.config.accounting_storage_type) + "<br>" +
get_field("ClusterName",document.config.cluster_name) + "<br>" +
"#JobAcctGatherFrequency=30 <br>" +
"JobAcctGatherType=jobacct_gather/" + get_radio_value(document.config.job_acct_gather_type) + "<br>" +
"#SlurmctldDebug=3 <br>" +
get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" +
"#SlurmdDebug=3 <br>" +
get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" +
"# <br>" +
"# <br>" +
"# COMPUTE NODES <br>" +
"NodeName=" + document.config.node_name.value +
get_field2(" NodeAddr",document.config.node_addr) +
get_field2(" CPUs",document.config.procs) +
get_field2(" RealMemory",document.config.memory) +
get_field2(" Sockets",document.config.sockets) +
get_field2(" CoresPerSocket",document.config.cores_per_socket) +
get_field2(" ThreadsPerCore",document.config.threads_per_core) +
" State=UNKNOWN <br>" +
"PartitionName=" + document.config.partition_name.value +
" Nodes=" + document.config.node_name.value +
" Default=YES"+
" MaxTime=" + document.config.max_time.value +
" State=UP"
//scroll(0,0);
//var popup = document.getElementById('out_box');
//popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>";
//popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>";
//popup.innerHTML += printme;
//popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>";
//popup.innerHTML += "<a href='javascript:hide_box();'>close</a>";
//popup.style.visibility = 'visible';
// OLD CODE
document.open();
document.write(printme);
document.close();
}
-->
</SCRIPT>
<!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> -->
</HEAD>
<BODY>
<FORM name=config>
<H1>SLURM Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1>
<P>This form can be used to create a SLURM configuration file with
you controlling many of the important configuration parameters.</P>
<p>This is a simplified version of the SLURM configuration tool. This version
has fewer options for creating a SLURM configuration file. The full version
of the SLURM configuration tool is available at
<a href="configurator.html">configurator.html</a>.</p>
<P><B>This tool supports SLURM version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
Configuration files for other versions of SLURM should be built
using the tool distributed with it in <i>doc/html/configurator.html</i>.
Some parameters will be set to default values, but you can
manually edit the resulting <I>slurm.conf</I> as desired
for greater flexibility. See <I>man slurm.conf</I> for more
details about the configuration parameters.</P>
<P>Note the while SLURM daemons create log files and other files as needed,
it treats the lack of parent directories as a fatal error.
This prevents the daemons from running if critical file systems are
not mounted and will minimize the risk of cold-starting (starting
without preserving jobs).</P>
<P>Note that this configuration file must be installed on all nodes
in your cluster.</P>
<P>After you have filled in the fields of interest, use the
"Submit" button on the bottom of the page to build the <I>slurm.conf</I>
file. It will appear on your web browser. Save the file in text format
as <I>slurm.conf</I> for use by SLURM.
<P>For more information about SLURM, see
<A HREF="http://www.schedmd.com/slurmdocs/slurm.html">http://www.schedmd.com/slurmdocs/slurm.html</A>
<H2>Control Machines</H2>
Define the hostname of the computer on which the SLURM controller and
optional backup controller will execute. You can also specify addresses
of these computers if desired (defaults to their hostnames).
The IP addresses can be either numeric IP addresses or names.
Hostname values should should not be the fully qualified domain
name (e.g. use <I>tux</I> rather than <I>tux.abc.com</I>).
<P>
<input type="text" name="control_machine" value="linux0"> <B>ControlMachine</B>:
Master Controller Hostname
<P>
<input type="text" name="control_addr"> <B>ControlAddr</B>: Master Controller
Address (optional)
<P>
<H2>Compute Machines</H2>
Define the machines on which user applications can run.
You can also specify addresses of these computers if desired
(defaults to their hostnames).
Only a few of the possible parameters associated with the nodes will
be set by this tool, but many others are available.
Executing the command <i>slurmd -C</i> on each compute node will print its
physical configuration (sockets, cores, real memory size, etc.), which
can be used in constructing the <i>slurm.conf</i> file.
All of the nodes will be placed into a single partition (or queue)
with global access. Many options are available to group nodes into
partitions with a wide variety of configuration parameters.
Manually edit the <i>slurm.conf</i> produced to exercise these options.
Node names and addresses may be specified using a numeric range specification.
<P>
<input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>:
Compute nodes
<P>
<input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses
(optional)
<P>
<input type="text" name="partition_name" value="debug"> <B>PartitionName</B>:
Name of the one partition to be created
<P>
<input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>:
Maximum time limit of jobs in minutes or INFINITE
<P>
The following parameters describe a node's configuration.
Set a value for <B>CPUs</B>.
The other parameters are optional, but provide more control over scheduled resources:
<P>
<input type="text" name="procs" value="1"> <B>CPUs</B>: Count of processors
on each compute node.
If CPUs is omitted, it will be inferred from:
Sockets, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="sockets" value="">
<B>Sockets</B>:
Number of physical processor sockets/chips on the node.
If Sockets is omitted, it will be inferred from:
CPUs, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="cores_per_socket" value="">
<B>CoresPerSocket</B>:
Number of cores in a single physical processor socket.
The CoresPerSocket value describes physical cores, not
the logical number of processors per socket.
<P>
<input type="text" name="threads_per_core" value="">
<B>ThreadsPerCore</B>:
Number of logical threads in a single physical core.
<P>
<input type="text" name="memory" value=""> <B>RealMemory</B>: Amount
of real memory. This parameter is required when specifying Memory as a
consumable resource with the select/cons_res plug-in. See below
under Resource Selection.
<P>
<H2>SLURM User</H2>
The SLURM controller (slurmctld) can run without elevated privileges,
so it is recommended that a user "slurm" be created for it. For testing
purposes any user name can be used.
<P>
<input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B>
<P>
<H2>State Preservation</H2>
Define the location of a directory where the slurmctld daemon saves its state.
This should be a fully qualified pathname which can be read and written to
by the SLURM user on both the control machine and backup controller (if configured).
The location of a directory where slurmd saves state should also be defined.
This must be a unique directory on each compute server (local disk).
The use of a highly reliable file system (e.g. RAID) is recommended.
<P>
<input type="text" name="state_save_location" value="/var/spool"> <B>StateSaveLocation</B>:
Slurmctld state save directory </B>
<P>
<input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd"> <B>SlurmdSpoolDir</B>:
Slurmd state save directory
<P>
Define when a non-responding (DOWN) node is returned to service.<BR>
Select one value for <B>ReturnToService</B>:<BR>
<input type="radio" name="return_to_service" value="0">
<B>0</B>: When explicitly restored to service by an administrator.<BR>
<input type="radio" name="return_to_service" value="1" checked>
<B>1</B>: Automatically, when slurmd daemon registers with valid configuration<BR>
<P>
<H2>Scheduling</H2>
Define the mechanism to be used for controlling job ordering.<BR>
Select one value for <B>SchedulerType</B>:<BR>
<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In
First-Out (FIFO)<BR>
<input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>:
FIFO with backfill<BR>
<P>
Define what node configuration (sockets, cores, memory, etc.) should be used.
Using values defined in the configuration file will provide faster scheduling.<BR>
Select one value for <B>FastSchedule</B>:<BR>
<input type="radio" name="fast_schedule" value="1" checked>
<B>1</B>: Use node configuration values defined in configuration file<BR>
<input type="radio" name="fast_schedule" value="0">
<B>0</B>: Use node configuration values actually found on each node
(if configured with with gang scheduling or allocation of individual
processors to jobs rather than only whole node allocations, the processor
count on the node should match the configured value to avoid having extra
processors left idle)
<P>
<H2>Interconnect</H2>
Define the node interconnect used.<BR>
Select one value for <B>SwitchType</B>:<BR>
<input type="radio" name="switch_type" value="elan"> <B>Elan</B>: Quadrics Elan3 or Elan4<BR>
<input type="radio" name="switch_type" value="federation"> <B>Federation</B>: IBM
Federation Switch<BR>
<input type="radio" name="switch_type" value="none" checked> <B>None</B>: No special
handling required (InfiniBand, Myrinet, Ethernet, etc.)<BR>
<P>
<H2>Default MPI Type</H2>
Specify the type of MPI to be used by default. SLURM will configure environment
variables accordingly. Users can over-ride this specification with an srun option.<BR>
Select one value for <B>MpiDefault</B>:<BR>
<input type="radio" name="mpi_default" value="mpichgm"> <B>MPICH-GM</B><BR>
<input type="radio" name="mpi_default" value="mpichmx"> <B>MPICH-MX</B><BR>
<input type="radio" name="mpi_default" value="mpich1_p4"> <B>MPICH1-P4</B><BR>
<input type="radio" name="mpi_default" value="mpich1_shmem"> <B>MPICH1-SHMEM</B>:
This also works for MVAPICH-SHMEM.<BR>
<input type="radio" name="mpi_default" value="pmi2"> <B>MPI-PMI2</B>
(For MPI2 and MVAPICH2)<BR>
<input type="radio" name="mpi_default" value="mvapich"> <B>MVAPICH</B><BR>
<input type="radio" name="mpi_default" value="none" checked> <B>None</B>:
+This works for most other MPI types including MPICH2, LAM MPI and Open MPI.<BR>
<P>
<H2>Process Tracking</H2>
Define the algorithm used to identify which processes are associated with a
given job. This is used signal, kill, and account for the processes associated
with a job step.<BR>
Select one value for <B>ProctrackType</B>:<BR>
<input type="radio" name="proctrack_type" value="aix"> <B>AIX</B>: Use AIX kernel
extension, recommended for AIX systems<BR>
<input type="radio" name="proctrack_type" value="cgroup"> <B>Cgroup</B>: Use
Linux <i>cgroup</i> to create a job container and track processes.
Build a <i>cgroup.conf</i> file as well<BR>
<input type="radio" name="proctrack_type" value="pgid" checked> <B>Pgid</B>: Use Unix
Process Group ID, processes changing their process group ID can escape from SLURM
control<BR>
<input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use
parent process ID records, required for MPICH-GM use, processes can escape
from SLURM control<BR>
<input type="radio" name="proctrack_type" value="rms"> <B>RMS</B>: Use Quadrics
kernel infrastructure, recommended for systems where this is available<BR>
<input type="radio" name="proctrack_type" value="sgi_job"> <B>SGI's PAGG
module</B>: Use <A HREF="http://oss.sgi.com/projects/pagg/">SGI's Process
Aggregates (PAGG) kernel module</A>, recommended where available<BR>
<P>
<H2>Resource Selection</H2>
Define resource (node) selection algorithm to be used.<BR>
Select one value for <B>SelectType</B>:<BR>
<input type="radio" name="select_type" value="cons_res">
<B>Cons_res</B>: Allocate individual processors and memory<BR>
<DL>
<DL>
<DT><B>SelectTypeParameters</B> (As used by <I>SelectType=Cons_res</I> only):
<DD> Note: The -E extension for sockets, cores, and threads
are ignored within the node allocation mechanism
when CR_CPU or CR_CPU_MEMORY is selected.
They are considered to compute the total number of
tasks when -n is not specified
<DD> Note: CR_MEMORY assumes MaxShare value of one of higher
<DT> <input type="radio" name="cons_res_params" value="CR_CPU" checked
onClick="javascript:set_select_type(this, 'cons_res')">
<B>CR_CPU</B>: (default)
CPUs as consumable resources.
<DD> No notion of sockets, cores, or threads.
On a multi-core system, cores will be considered CPUs.
On a multi-core/hyperthread system, threads will be considered CPUs.
On a single-core systems CPUs are CPUs. ;-)
<DT> <input type="radio" name="cons_res_params" value="CR_Socket"
onClick="javascript:set_select_type(this)">
<B>CR_Socket</B>: Sockets as a consumable resource.
<DT> <input type="radio" name="cons_res_params" value="CR_Core"
onClick="javascript:set_select_type(this)">
<B>CR_Core</B>: Cores as a consumable resource.
<DT> <input type="radio" name="cons_res_params" value="CR_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Memory</B>: Memory as a consumable resource.
<DD> Note: CR_Memory assumes MaxShare value of one of higher
<DT> <input type="radio" name="cons_res_params" value="CR_CPU_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_CPU_Memory</B>:
CPU and Memory as consumable resources.
<DT> <input type="radio" name="cons_res_params" value="CR_Socket_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Socket_Memory</B>:
Socket and Memory as consumable resources.
<DT> <input type="radio" name="cons_res_params" value="CR_Core_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Core_Memory</B>:
Core and Memory as consumable resources.
</DL>
</DL>
<input type="radio" name="select_type" value="linear" checked>
<B>Linear</B>: Node-base
resource allocation, does not manage individual processor allocation<BR>
<input type="radio" name="select_type" value="bluegene">
<B>BlueGene</B>: For IBM Blue Gene systems only<BR>
<input type="radio" name="select_type" value="cray">
<B>Cray</B>: Cray systems running ALPS only<BR>
<P>
<H2>Task Launch</H2>
Define a task launch plugin. This may be used to
provide resource management within a node (e.g. pinning
tasks to specific processors).
Select one value for <B>TaskPlugin</B>:<BR>
<input type="radio" name="task_plugin" value="none" checked> <B>None</B>: No task launch actions<BR>
<input type="radio" name="task_plugin" value="affinity"> <B>Affinity</B>:
CPU affinity support
(see srun man pages for the --cpu_bind, --mem_bind, and -E options)<BR>
<input type="radio" name="task_plugin" value="cgroup"> <B>Cgroup</B>:
Allocated resources constraints enforcement using Linux Control Groups
(see cgroup.conf man page)
</DL></DL>
<P>
<H2>Event Logging</H2>
Slurmctld and slurmd daemons can each be configured with different
levels of logging verbosity from 0 (quiet) to 7 (extremely verbose).
Each may also be configured to use debug files. Use fully qualified
pathnames for the files.
<P>
<input type="text" name="slurmctld_logfile" value=""> <B>SlurmctldLogFile</B> (default is none, log goes to syslog)
<P>
<input type="text" name="slurmd_logfile" value=""> <B>SlurmdLogFile</B> (default is none,
log goes to syslog, string "%h" in name gets replaced with hostname)
<P>
<H2>Job Accounting Gather</H2>
SLURM accounts for resource use per job. System specifics can be polled
determined by system type<BR>
Select one value for <B>JobAcctGatherType</B>:<BR>
<input type="radio" name="job_acct_gather_type" value="none" checked> <B>None</B>: No
job accounting<BR>
<input type="radio" name="job_acct_gather_type" value="aix"> <B>AIX</B>: Specifc
AIX process table information gathered, use with AIX systems only<BR>
<input type="radio" name="job_acct_gather_type" value="linux"> <B>Linux</B>: Specifc
Linux process table information gathered, use with Linux systems only<BR>
<H2>Job Accounting Storage</H2>
Used with the Job Accounting Gather SLURM can store the accounting information in many different fashions. Fill in your systems choice here<BR>
Select one value for <B>AccountingStorageType</B>:<BR>
<input type="radio" name="accounting_storage_type" value="none" checked> <B>None</B>:
No job accounting storage<BR>
<input type="radio" name="accounting_storage_type" value="filetxt"> <B>FileTxt</B>:
Write job accounting to a text file (records limited information)<BR>
<input type="radio" name="accounting_storage_type" value="mysql"> <B>MySQL</B>:
Write job accounting to a MySQL database<BR>
<input type="radio" name="accounting_storage_type" value="pgsql"> <B>PGSQL</B>:
Write job accounting to a PostreSQL database (not fully supported)<BR>
<input type="radio" name="accounting_storage_type" value="slurmdbd"> <B>SlurmDBD</B>:
Write job accounting to Slurm DBD (database daemon) which can securely
save the data from many Slurm managed clusters into a common database<BR>
<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
<input type="text" name="cluster_name" value="cluster"> <B>ClusterName</B>:
Name to be recorded in database for jobs from this cluster.
This is important if a single database is used to record information
from multiple Slurm-managed clusters.<br>
<P>
<H2>Process ID Logging</H2>
Define the location into which we can record the daemon's process ID.
This is used for locate the appropriate daemon for signalling.
Specify a specify the fully qualified pathname for the file.
<P>
<input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid">
<B>SlurmctldPidFile</B>
<P>
<input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid">
<B>SlurmdPidFile</B>
<P>
<BR>
<BR>
<input type=button value="Submit" onClick="javascript:displayfile()">
<input type=reset value="Reset Form">
<P>
</FORM>
<HR>
<a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br>
Last modified 3 April 2012</P>
</BODY>