blob: 6619d7f8a3a5c873201fdc4af8aa1860ccfa3e03 [file] [log] [blame] [edit]
<!--
Copyright (C) 2005-2007 The Regents of the University of California.
Copyright (C) 2008-2009 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov>
This file is part of SLURM, a resource management program.
For details, see <https://computing.llnl.gov/linux/slurm/>.
SLURM is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with SLURM; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-->
<HTML>
<HEAD><TITLE>SLURM System Configuration Tool</TITLE>
<SCRIPT type="text/javascript">
<!--
function get_field(name,form)
{
if (form.value)
return name + "=" + form.value
return "#" + name + "="
}
function get_field2(name,form)
{
if (form.value)
return name + "=" + form.value
return ""
}
function get_accounting_storage_type_field(gather, form_storage)
{
for (var i=0; i < form_storage.length; i++)
{
if (form_storage[i].checked)
{
if(form_storage[i].value == "none" && !(gather == "none"))
{
return "filetxt"
}
else if(!(form_storage[i].value == "none") && gather == "none")
{
return "none"
}
else {
return form_storage[i].value
}
}
}
}
function get_radio_field_skipfirst(name,form)
{
for (var i=1; i < form.length; i++)
{
if (form[i].checked)
{
return name + "=" + form[i].value
}
}
return "#" + name + "="
}
function get_radio_value(form)
{
for (var i=0; i < form.length; i++)
{
if (form[i].checked)
{
return form[i].value
}
}
}
// When choosing SelectTypeParameters make sure the select_type
// matches the parameter being checked
function set_select_type(form)
{
if (form.name == "cons_res_params") {
document.config.select_type[0].click() // cons_res
} else if (form.name == "linear_params") {
document.config.select_type[1].click() // linear
} else if (form.name == "bluegene_params") {
document.config.select_type[2].click() // bluegene
}
}
// When generating SelectTypeParameters return the parameters that
// correspond to the checked select_type
function get_select_type_params()
{
if (document.config.select_type[0].checked) { // cons_res
return get_radio_field_skipfirst("SelectTypeParameters",
document.config.cons_res_params)
} else if (document.config.select_type[1].checked) { // linear
return "#SelectTypeParameters="
} else if (document.config.select_type[2].checked) { // bluegene
return "#SelectTypeParameters="
}
}
// When generating TaskPluginParam return the parameters that
// correspond to the checked task_plugin
function get_task_plugin_param()
{
for (var i=0; i<document.config.task_plugin.length; i++) {
if (document.config.task_plugin[i].checked) {
if (document.config.task_plugin[i].value == "affinity") {
return "TaskPluginParam=" +
get_radio_value(document.config.task_plugin_param)
}
}
}
return "#TaskPluginParam="
}
function hide_box()
{
var popup = document.getElementById('out_box');
popup.style.visibility = 'hidden';
}
function displayfile()
{
var printme = "# slurm.conf file generated by configurator.html.<br>" +
"# Put this file on all nodes of your cluster.<br>" +
"# See the slurm.conf man page for more information.<br>" +
"#<br>" +
"ControlMachine=" + document.config.control_machine.value + "<br>" +
get_field("ControlAddr",document.config.control_addr) + "<br>" +
get_field("BackupController",document.config.backup_controller) + "<br>" +
get_field("BackupAddr",document.config.backup_addr) + "<br>" +
"# <br>" +
"AuthType=auth/" + get_radio_value(document.config.auth_type) + "<br>" +
"CacheGroups=" + get_radio_value(document.config.cache_groups) + "<br>" +
"#CheckpointType=checkpoint/none <br>" +
"CryptoType=crypto/" + get_radio_value(document.config.crypto_type) + "<br>" +
"#DisableRootJobs=NO <br>" +
"#EnforcePartLimits=NO <br>" +
get_field("Epilog",document.config.epilog) + "<br>" +
"#PrologSlurmctld= <br>" +
"#FirstJobId=1 <br>" +
"#JobCheckpointDir=/var/slurm/checkpoint <br>" +
get_field("JobCredentialPrivateKey", document.config.private_key) + "<br>" +
get_field("JobCredentialPublicCertificate", document.config.public_key) + "<br>" +
"#JobFileAppend=0 <br>" +
"#JobRequeue=1 <br>" +
"#KillOnBadExit=0 <br>" +
"#Licenses=foo*4,bar <br>" +
"#MailProg=/bin/mail <br>" +
"#MaxJobCount=5000 <br>" +
"MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" +
"#MpiParams=ports=#-# <br>" +
"#PluginDir= <br>" +
"#PlugStackConfig= <br>" +
"#PrivateData=jobs <br>" +
"ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" +
get_field("Prolog",document.config.prolog) + "<br>" +
"#PrologSlurmctld= <br>" +
"#PropagatePrioProcess=0 <br>" +
"#PropagateResourceLimits= <br>" +
"#PropagateResourceLimitsExcept= <br>" +
"ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
"#SallocDefaultCommand= <br>" +
"SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
"SlurmctldPort=" + document.config.slurmctld_port.value + "<br>" +
"SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" +
"SlurmdPort=" + document.config.slurmd_port.value + "<br>" +
"SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" +
"SlurmUser=" + document.config.slurm_user.value + "<br>" +
get_field("SrunEpilog",document.config.srun_epilog) + "<br>" +
get_field("SrunProlog",document.config.srun_prolog) + "<br>" +
"StateSaveLocation=" + document.config.state_save_location.value + "<br>" +
"SwitchType=switch/" + get_radio_value(document.config.switch_type) + "<br>" +
get_field("TaskEpilog",document.config.task_epilog) + "<br>" +
"TaskPlugin=task/" + get_radio_value(document.config.task_plugin) + "<br>" +
get_task_plugin_param() + "<br>" +
get_field("TaskProlog",document.config.task_prolog) + "<br>" +
"#TopologyPlugin=topology/tree <br>" +
"#TmpFs=/tmp <br>" +
"#TrackWCKey=no <br>" +
"#TreeWidth= <br>" +
"#UnkillableStepProgram= <br>" +
"#UnkillableStepTimeout= <br>" +
"#UsePAM=0 <br>" +
"# <br>" +
"# <br>" +
"# TIMERS <br>" +
"#BatchStartTimeout=10 <br>" +
"#CompleteWait=0 <br>" +
"#EpilogMsgTime=2000 <br>" +
"#GetEnvTimeout=2 <br>" +
"#HealthCheckInterval=0 <br>" +
"#HealthCheckProgram= <br>" +
"InactiveLimit=" + document.config.inactive_limit.value + "<br>" +
"KillWait=" + document.config.kill_wait.value + "<br>" +
"#MessageTimeout=10 <br>" +
"#ResvOverRun=0 <br>" +
"MinJobAge=" + document.config.min_job_age.value + "<br>" +
"#OverTimeLimit=0 <br>" +
"SlurmctldTimeout=" + document.config.slurmctld_timeout.value + "<br>" +
"SlurmdTimeout=" + document.config.slurmd_timeout.value + "<br>" +
"#UnkillableStepProgram= <br>" +
"#UnkillableStepTimeout=60 <br>" +
"Waittime=" + document.config.wait_time.value + "<br>" +
"# <br>" +
"# <br>" +
"# SCHEDULING <br>" +
"#DefMemPerCPU=0 <br>" +
"FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" +
"#MaxMemPerCPU=0 <br>" +
"#SchedulerRootFilter=1 <br>" +
"#SchedulerTimeSlice=30 <br>" +
"SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
get_field("SchedulerPort",document.config.scheduler_port) + "<br>" +
"SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" +
get_select_type_params() + "<br>" +
"# <br>" +
"# <br>" +
"# JOB PRIORITY <br>" +
"#PriorityType=priority/basic <br>" +
"#PriorityDecayHalfLife= <br>" +
"#PriorityFavorSmall= <br>" +
"#PriorityMaxAge= <br>" +
"#PriorityUsageResetPeriod= <br>" +
"#PriorityWeightAge= <br>" +
"#PriorityWeightFairshare= <br>" +
"#PriorityWeightJobSize= <br>" +
"#PriorityWeightPartition= <br>" +
"#PriorityWeightQOS= <br>" +
"# <br>" +
"# <br>" +
"# LOGGING AND ACCOUNTING <br>" +
"#AccountingStorageEnforce=0 <br>" +
get_field("AccountingStorageHost",document.config.accounting_storage_host) + "<br>" +
get_field("AccountingStorageLoc",document.config.accounting_storage_loc) + "<br>" +
get_field("AccountingStoragePass",document.config.accounting_storage_pass) + "<br>" +
get_field("AccountingStoragePort",document.config.accounting_storage_port) + "<br>" +
"AccountingStorageType=accounting_storage/" + get_accounting_storage_type_field(get_radio_value(document.config.job_acct_gather_type), document.config.accounting_storage_type) + "<br>" +
get_field("AccountingStorageUser",document.config.accounting_storage_user) + "<br>" +
get_field("ClusterName",document.config.cluster_name) + "<br>" +
"#DebugFlags= <br>" +
get_field("JobCompHost",document.config.job_comp_host) + "<br>" +
get_field("JobCompLoc",document.config.job_comp_loc) + "<br>" +
get_field("JobCompPass",document.config.job_comp_pass) + "<br>" +
get_field("JobCompPort",document.config.job_comp_port) + "<br>" +
"JobCompType=jobcomp/" + get_radio_value(document.config.job_comp_type) + "<br>" +
get_field("JobCompUser",document.config.job_comp_user) + "<br>" +
get_field("JobAcctGatherFrequency",document.config.job_acct_gather_frequency) + "<br>" +
"JobAcctGatherType=jobacct_gather/" + get_radio_value(document.config.job_acct_gather_type) + "<br>" +
"SlurmctldDebug=" + document.config.slurmctld_debug.value + "<br>" +
get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" +
"SlurmdDebug=" + document.config.slurmd_debug.value + "<br>" +
get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" +
"# <br>" +
"# <br>" +
"# POWER SAVE SUPPORT FOR IDLE NODES (optional) <br>" +
"#SuspendProgram= <br>" +
"#ResumeProgram= <br>" +
"#SuspendTimeout= <br>" +
"#ResumeTimeout= <br>" +
"#ResumeRate= <br>" +
"#SuspendExcNodes= <br>" +
"#SuspendExcParts= <br>" +
"#SuspendRate= <br>" +
"#SuspendTime= <br>" +
"# <br>" +
"# <br>" +
"# COMPUTE NODES <br>" +
"NodeName=" + document.config.node_name.value +
get_field2(" NodeAddr",document.config.node_addr) +
get_field2(" Procs",document.config.procs) +
get_field2(" RealMemory",document.config.memory) +
get_field2(" Sockets",document.config.sockets) +
get_field2(" CoresPerSocket",document.config.cores_per_socket) +
get_field2(" ThreadsPerCore",document.config.threads_per_core) +
" State=UNKNOWN <br>" +
"PartitionName=" + document.config.partition_name.value +
" Nodes=" + document.config.node_name.value +
" Default=YES"+
" MaxTime=" + document.config.max_time.value +
" State=UP"
//scroll(0,0);
//var popup = document.getElementById('out_box');
//popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>";
//popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>";
//popup.innerHTML += printme;
//popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>";
//popup.innerHTML += "<a href='javascript:hide_box();'>close</a>";
//popup.style.visibility = 'visible';
// OLD CODE
document.open();
document.write(printme);
document.close();
}
-->
</SCRIPT>
<!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> -->
</HEAD>
<BODY>
<FORM name=config>
<H1>SLURM Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1>
<P>This form can be used to create a SLURM configuration file with
you controlling many of the important configuration parameters.</P>
<P><B>This tool supports SLURM version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
Configuration files for other versions of SLURM should be built
using the tool distributed with it in <i>doc/html/configurator.html</i>.
Some parameters will be set to default values, but you can
manually edit the resulting <I>slurm.conf</I> as desired
for greater flexibility. See <I>man slurm.conf</I> for more
details about the configuration parameters.</P>
<P>Note the while SLURM daemons create log files and other files as needed,
it treats the lack of parent directories as a fatal error.
This prevents the daemons from running if critical file systems are
not mounted and will minimize the risk of cold-starting (starting
without preserving jobs).</P>
<P>Note that this configuration file must be installed on all nodes
in your cluster.</P>
<P>After you have filled in the fields of interest, use the
"Submit" button on the bottom of the page to build the <I>slurm.conf</I>
file. It will appear on your web browser. Save the file in text format
as <I>slurm.conf</I> for use by SLURM.
<P>For more information about SLURM, see
<A HREF="https://computing.llnl.gov/linux/slurm/">https://computing.llnl.gov/linux/slurm/</A>
<P>
<A HREF="https://www.llnl.gov/disclaimer.html"><B>Privacy and legal notice</B></A>
<H2>Control Machines</H2>
Define the hostname of the computer on which the SLURM controller and
optional backup controller will execute. You can also specify addresses
of these computers if desired (defaults to their hostnames).
The IP addresses can be either numeric IP addresses or names.
Hostname values should should not be the fully qualified domain
name (e.g. use <I>linux</I> rather than <I>linux.llnl.gov</I>).
<P>
<input type="text" name="control_machine" value="linux0"> <B>ControlMachine</B>:
Master Controller Hostname
<P>
<input type="text" name="control_addr"> <B>ControlAddr</B>: Master Controller
Address (optional)
<P>
<input type="text" name="backup_controller"> <B>BackupController</B>: Backup
Controller Hostname (optional)
<P>
<input type="text" name="backup_addr"> <B>BackupAddr</B>: Backup Controller
Address (optional)
<P>
<H2>Compute Machines</H2>
Define the machines on which user applications can run.
You can also specify addresses of these computers if desired
(defaults to their hostnames).
Only a few of the possible parameters associated with the nodes will
be set by this tool, but many others are available.
All of the nodes will be placed into a single partition (or queue)
with global access. Many options are available to group nodes into
partitions with a wide variety of configuration parameters.
Manually edit the <i>slurm.conf</i> produced to exercise these options.
Node names and addresses may be specified using a numeric range specification.
<P>
<input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>:
Compute nodes
<P>
<input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses
(optional)
<P>
<input type="text" name="partition_name" value="debug"> <B>PartitionName</B>:
Name of the one partition to be created
<P>
<input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>:
Maximum time limit of jobs in minutes or INFINITE
<P>
The following parameters describe a node's configuration.
Set a value for <B>Procs</B>.
The other parameters are optional, but provide more control over scheduled resources:
<P>
<input type="text" name="procs" value="1"> <B>Procs</B>: Count of processors
on each compute node.
If Procs is omitted, it will be inferred from:
Sockets, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="sockets" value="">
<B>Sockets</B>:
Number of physical processor sockets/chips on the node.
If Sockets is omitted, it will be inferred from:
Procs, CoresPerSocket, and ThreadsPerCore.
<P>
<input type="text" name="cores_per_socket" value="">
<B>CoresPerSocket</B>:
Number of cores in a single physical processor socket.
The CoresPerSocket value describes physical cores, not
the logical number of processors per socket.
<P>
<input type="text" name="threads_per_core" value="">
<B>ThreadsPerCore</B>:
Number of logical threads in a single physical core.
<P>
<input type="text" name="memory" value=""> <B>RealMemory</B>: Amount
of real memory. This parameter is required when specifying Memory as a
consumable resource with the select/cons_res plug-in. See below
under Resource Selection.
<P>
<H2>SLURM User</H2>
The SLURM controller (slurmctld) can run without elevated privileges,
so it is recommended that a user "slurm" be created for it. For testing
purposes any user name can be used.
<P>
<input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B>
<P>
<H2>Group ID Caching</H2>
If you have a slow NIS environment, big parallel jobs take a long time
to start up (and may eventually time-out) because the NIS server(s)
may not be able to quickly respond to simultaneous requests from
multiple slurmd's. You can instruct slurmd to cache /etc/groups
entries to prevent this from happening by setting
<B>CacheGroups</B>=1. Reconfiguring ("scontrol reconfig") with
<B>CacheGroups</B>=0 will cause slurmd to purge the cache. Select one
value for <B>CacheGroups</B>:<BR>
<input type="radio" name="cache_groups" value="0" checked>
<B>0</B>: for normal environment.<BR>
<input type="radio" name="cache_groups" value="1">
<B>1</B>: for slow NIS environment.
<P>
WARNING: The group ID cache does not try to keep itself in sync with
the system. You MUST run "scontrol reconfig" to update the cache
after making any changes to system password or group databases.
<P>
<H2>SLURM Port Numbers</H2>
The SLURM controller (slurmctld) requires a unique port for communications
as do the SLURM compute node daemons (slurmd). If not set, slurm ports
are set by checking for an entry in <I>/etc/services</I> and if that
fails by using an interval default set at SLURM build time.
<P>
<input type="text" name="slurmctld_port" value="6817"> <B>SlurmctldPort</B>
<P>
<input type="text" name="slurmd_port" value="6818"> <B>SlurmdPort</B>
<P>
<H2>Authentication and Security</H2>
Define the method used for authenticating communicating between SLURM components.<BR>
Select one value for <B>AuthType</B>:<BR>
<input type="radio" name="auth_type" value="none"> <B>None</B>: No authentication,
not recommended production use<br>
<input type="radio" name="auth_type" value="authd"> <B>Authd</B>: Brent Chun's
<A href="http://www.theether.org/authd/">authd</A><BR>
<input type="radio" name="auth_type" value="munge" checked> <B>Munge</B>: LLNL's
<A href="http://home.gna.org/munge/">Munge</A><BR>
<P>
Library used for job step cryptographic signature generation.<BR>
Select one value for <B>CryptoType</B>:<BR>
<input type="radio" name="crypto_type" value="munge" checked><B>Munge</B>: LLNL's
<A href="http://home.gna.org/munge/">Munge</A> (has Gnu Public License)<BR>
<input type="radio" name="crypto_type" value="openssl"> <B>OpenSSL</B>:
<A href="http://www.openssl.org/">OpenSSL</A>
<P>
Define the location of public and private keys used by SLURM's
cryptographic signature generation plugin (CryptoType).<br>
<b>These values are only used if CryptoType=OpenSSL.</b><br>
These files need to be generated by the SLURM administrator.
Specify fully qualified pathnames.
<P>
<input type="text" name="private_key"> <B>JobCredentialPrivateKey</B>
<P>
<input type="text" name="public_key"> <B>JobCredentialPublicCertificate</B>
<P>
<H2>State Preservation</H2>
Define the location of a directory where the slurmctld daemon saves its state.
This should be a fully qualified pathname which can be read and written to
by the SLURM user on both the control machine and backup controller (if configured).
The location of a directory where slurmd saves state should also be defined.
This must be a unique directory on each compute server (local disk).
The use of a highly reliable file system (e.g. RAID) is recommended.
<P>
<input type="text" name="state_save_location" value="/tmp"> <B>StateSaveLocation</B>:
Slurmctld state save directory <B>Must be writable by both ControlMachine and BackupController</B>
<P>
<input type="text" name="slurmd_spool_dir" value="/tmp/slurmd"> <B>SlurmdSpoolDir</B>:
Slurmd state save directory
<P>
Define when a non-responding (DOWN) node is returned to service.<BR>
Select one value for <B>ReturnToService</B>:<BR>
<input type="radio" name="return_to_service" value="0">
<B>0</B>: When explicitly restored to service by an administrator.<BR>
<input type="radio" name="return_to_service" value="1" checked>
<B>1</B>: Automatically, when slurmd daemon registers with valid configuration<BR>
<P>
<H2>Scheduling</H2>
Define the mechanism to be used for controlling job ordering.<BR>
Select one value for <B>SchedulerType</B>:<BR>
<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In
First-Out (FIFO)<BR>
<input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>:
FIFO with backfill<BR>
<input type="radio" name="sched_type" value="gang"> <B>Gang</B>: Gang scheduling
(time-slicing for parallel jobs)<BR>
<input type="radio" name="sched_type" value="wiki"> <B>Wiki</B>: Wiki interface
to Maui (configuration parameter <B>SchedulerPort</B> must specified)<BR>
<input type="radio" name="sched_type" value="wiki2"> <B>Wiki2</B>: Wiki interface
to Moab (configuration parameter <B>SchedulerPort</B> must specified)<BR>
<P>
<input type="text" name="scheduler_port" value="7321"> <B>SchedulerPort</B>: scheduler
communications port (used by Wiki only)
<P>
Define what node configuration (sockets, cores, memory, etc.) should be used.
Using values defined in the configuration file will provide faster scheduling.<BR>
Select one value for <B>FastSchedule</B>:<BR>
<input type="radio" name="fast_schedule" value="1" checked>
<B>1</B>: Use node configuration values defined in configuration file<BR>
<input type="radio" name="fast_schedule" value="0">
<B>0</B>: Use node configuration values actually found on each node
(if configured with with gang scheduling or allocation of individual
processors to jobs rather than only whole node allocations, the processor
count on the node should match the configured value to avoid having extra
processors left idle)
<P>
<H2>Interconnect</H2>
Define the node interconnect used.<BR>
Select one value for <B>SwitchType</B>:<BR>
<input type="radio" name="switch_type" value="elan"> <B>Elan</B>: Quadrics Elan3 or Elan4<BR>
<input type="radio" name="switch_type" value="federation"> <B>Federation</B>: IBM
Federation Switch<BR>
<input type="radio" name="switch_type" value="none" checked> <B>None</B>: No special
handling required (InfiniBand, Myrinet, Ethernet, etc.)<BR>
<P>
<H2>Default MPI Type</H2>
Specify the type of MPI to be used by default. SLURM will configure environment
variables accordingly. Users can over-ride this specification with an srun option.<BR>
Select one value for <B>MpiDefault</B>:<BR>
<input type="radio" name="mpi_default" value="mpichgm"> <B>MPICH-GM</B><BR>
<input type="radio" name="mpi_default" value="mpichmx"> <B>MPICH-MX</B><BR>
<input type="radio" name="mpi_default" value="mpich1_p4"> <B>MPICH1-P4</B><BR>
<input type="radio" name="mpi_default" value="mpich1_shmem"> <B>MPICH1-SHMEM</B>:
This also works for MVAPICH-SHMEM.<BR>
<input type="radio" name="mpi_default" value="mvapich"> <B>MVAPICH</B><BR>
<input type="radio" name="mpi_default" value="none" checked> <B>None</B>:
+This works for most other MPI types including MPICH2, LAM MPI and Open MPI.<BR>
<P>
<H2>Process Tracking</H2>
Define the algorithm used to identify which processes are associated with a
given job. This is used signal, kill, and account for the processes associated
with a job step.<BR>
Select one value for <B>ProctrackType</B>:<BR>
<input type="radio" name="proctrack_type" value="aix"> <B>AIX</B>: Use AIX kernel
extension, recommended for AIX systems<BR>
<input type="radio" name="proctrack_type" value="pgid" checked> <B>Pgid</B>: Use Unix
Process Group ID, processes changing their process group ID can escape from SLURM
control<BR>
<input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use
parent process ID records, required for MPICH-GM use, processes can escape
from SLURM control<BR>
<input type="radio" name="proctrack_type" value="rms"> <B>RMS</B>: Use Quadrics
kernel infrastructure, recommended for systems where this is available<BR>
<input type="radio" name="proctrack_type" value="sgi_job"> <B>SGI's PAGG
module</B>: Use <A HREF="http://oss.sgi.com/projects/pagg/">SGI's Process
Aggregates (PAGG) kernel module</A>, recommended where available<BR>
<P>
<H2>Resource Selection</H2>
Define resource (node) selection algorithm to be used.<BR>
Select one value for <B>SelectType</B>:<BR>
<input type="radio" name="select_type" value="cons_res">
<B>Cons_res</B>: Allocate individual processors and memory<BR>
<DL>
<DL>
<DT><B>SelectTypeParameters</B> (As used by <I>SelectType=Cons_res</I> only):
<DD> Note: The -E extension for sockets, cores, and threads
are ignored within the node allocation mechanism
when CR_CPU or CR_CPU_MEMORY is selected.
They are considered to compute the total number of
tasks when -n is not specified
<DD> Note: CR_MEMORY assumes MaxShare value of one of higher
<DT> <input type="radio" name="cons_res_params" value="CR_CPU" checked
onClick="javascript:set_select_type(this, 'cons_res')">
<B>CR_CPU</B>: (default)
CPUs as consumable resources.
<DD> No notion of sockets, cores, or threads.
On a multi-core system, cores will be considered CPUs.
On a multi-core/hyperthread system, threads will be considered CPUs.
On a single-core systems CPUs are CPUs. ;-)
<DT> <input type="radio" name="cons_res_params" value="CR_Socket"
onClick="javascript:set_select_type(this)">
<B>CR_Socket</B>: Sockets as a consumable resource.
<DT> <input type="radio" name="cons_res_params" value="CR_Core"
onClick="javascript:set_select_type(this)">
<B>CR_Core</B>: Cores as a consumable resource.
<DT> <input type="radio" name="cons_res_params" value="CR_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Memory</B>: Memory as a consumable resource.
<DD> Note: CR_Memory assumes MaxShare value of one of higher
<DT> <input type="radio" name="cons_res_params" value="CR_CPU_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_CPU_Memory</B>:
CPU and Memory as consumable resources.
<DT> <input type="radio" name="cons_res_params" value="CR_Socket_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Socket_Memory</B>:
Socket and Memory as consumable resources.
<DT> <input type="radio" name="cons_res_params" value="CR_Core_Memory"
onClick="javascript:set_select_type(this)">
<B>CR_Core_Memory</B>:
Core and Memory as consumable resources.
</DL>
</DL>
<input type="radio" name="select_type" value="linear" checked>
<B>Linear</B>: Node-base
resource allocation, does not manage individual processor allocation<BR>
<input type="radio" name="select_type" value="bluegene">
<B>BlueGene</B>: For IBM Blue Gene systems only<BR>
<P>
<H2>Task Launch</H2>
Define a task launch plugin. This may be used to
provide resource management within a node (e.g. pinning
tasks to specific processors).
Select one value for <B>TaskPlugin</B>:<BR>
<input type="radio" name="task_plugin" value="none" checked> <B>None</B>: No task launch actions<BR>
<input type="radio" name="task_plugin" value="affinity"> <B>Affinity</B>:
CPU affinity support
(see srun man pages for the --cpu_bind, --mem_bind, and -E options)
<DL><DL>
<DT><B>TaskPluginParam</B> (As used by <I>TaskPlugin=Affinity</I> only):
<DT><input type="radio" name="task_plugin_param" value="Cpusets">
<B>Cpusets</B>: Use <I>cpusets</I> to control task binding.
<DT><input type="radio" name="task_plugin_param" value="Sched" checked>
<B>Sched</B>: Use <I>sched_setaffinity</I> or <I>plpa_sched_setaffinity</I>
(if available) to bind tasks to processors. This is the default mode of
operation.
</DL></DL>
<P>
<H2>Prolog and Epilog</H2>
<P>
<B>Prolog/Epilog</B>: Fully qualified path that will be executed as
root on every node of a user's job before the job's tasks
will be initiated there and after that job has terminated.
These parameters are optional.
<DL>
<DT> <input type="text" name="epilog" value="" size=40> <B>Prolog</B>
<DT> <input type="text" name="prolog" value="" size=40> <B>Epilog</B>
</DL>
<P>
<B>SrunProlog/Epilog</B>: Fully qualified path to be executed by srun at
job step initiation and termination. These parameters may be overridden by
srun's --prolog and --epilog options
These parameters are optional.
<DL>
<DT> <input type="text" name="srun_prolog" value="" size=40> <B>SrunProlog</B>
<DT> <input type="text" name="srun_epilog" value="" size=40> <B>SrunEpilog</B>
</DL>
<P>
<B>TaskProlog/Epilog</B>: Fully qualified path to be executed as the user
before each task begins execution and after each task terminates.
These parameters are optional.
<DL>
<DT> <input type="text" name="task_prolog" value="" size=40> <B>TaskProlog</B>
<DT> <input type="text" name="task_epilog" value="" size=40> <B>TaskEpilog</B>
</DL>
<H2>Event Logging</H2>
Slurmctld and slurmd daemons can each be configured with different
levels of logging verbosity from 0 (quiet) to 7 (extremely verbose).
Each may also be configured to use debug files. Use fully qualified
pathnames for the files.
<P>
<input type="text" name="slurmctld_debug" value="3"> <B>SlurmctldDebug</B> (0 to 7)
<P>
<input type="text" name="slurmctld_logfile" value=""> <B>SlurmctldLogFile</B> (default is none, log goes to syslog)
<P>
<input type="text" name="slurmd_debug" value="3"> <B>SlurmdDebug</B> (0 to 7)
<P>
<input type="text" name="slurmd_logfile" value=""> <B>SlurmdLogFile</B> (default is none,
log goes to syslog, string "%h" in name gets replaced with hostname)
<P>
<H2>Job Completion Logging</H2>
Define the job completion logging mechanism to be used.<BR>
Select one value for <B>JobCompType</B>:<BR>
<input type="radio" name="job_comp_type" value="none" checked> <B>None</B>:
No job completion logging<BR>
<input type="radio" name="job_comp_type" value="filetxt"> <B>FileTxt</B>:
Write job completion status to a text file<BR>
<input type="radio" name="job_comp_type" value="script"> <B>Script</B>:
Use an arbitrary script to log job completion<BR>
<input type="radio" name="job_comp_type" value="mysql"> <B>MySQL</B>:
Write completion status to a MySQL database<BR>
<input type="radio" name="job_comp_type" value="pgsql"> <B>PGSQL</B>:
Write completion status to a PostreSQL database<BR>
<input type="radio" name="job_comp_type" value="slurmdbd"> <B>SlurmDBD</B>:
Write completion status to Slurm a database daemon (serving multiple Slurm clusters)
which will write to some database<BR>
<P>
<input type="text" name="job_comp_loc" value=""> <B>JobCompLoc</B>:
This is the location of the text file to be written to (if JobCompType=filetst)
or the script to be run (if JobCompType=script) or database name (for other values
of JobCompType).
<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
<input type="text" name="job_comp_host" value=""> <B>JobCompHost</B>:
Host the database is running on for Job completion<br>
<input type="text" name="job_comp_port" value=""> <B>JobCompPort</B>:
Port the database server is listening on for Job completion<br>
<input type="text" name="job_comp_user" value=""> <B>JobCompUser</B>:
User we are to use to talk to the database for Job completion<br>
<input type="text" name="job_comp_pass" value=""> <B>JobCompPass</B>:
Password we are to use to talk to the database for Job completion<br>
<P>
<H2>Job Accounting Gather</H2>
SLURM accounts for resource use per job. System specifics can be polled
determined by system type<BR>
Select one value for <B>JobAcctGatherType</B>:<BR>
<input type="radio" name="job_acct_gather_type" value="none" checked> <B>None</B>: No
job accounting<BR>
<input type="radio" name="job_acct_gather_type" value="aix"> <B>AIX</B>: Specifc
AIX process table information gathered, use with AIX systems only<BR>
<input type="radio" name="job_acct_gather_type" value="linux"> <B>Linux</B>: Specifc
Linux process table information gathered, use with Linux systems only<BR>
<input type="text" name="job_acct_gather_frequency" value="30"> <B>JobAcctGatherFrequency</B>:
polling interval in seconds. Zero disables periodic sampling.<BR>
<P>
<H2>Job Accounting Storage</H2>
Used with the Job Accounting Gather SLURM can store the accounting information in many different fashions. Fill in your systems choice here<BR>
Select one value for <B>AccountingStorageType</B>:<BR>
<input type="radio" name="accounting_storage_type" value="none" checked> <B>None</B>:
No job accounting storage<BR>
<input type="radio" name="accounting_storage_type" value="filetxt"> <B>FileTxt</B>:
Write job accounting to a text file<BR>
<input type="radio" name="accounting_storage_type" value="gold"> <B>Gold</B>:
Write completion status to Gold database daemon which can securely
save the data from many Slurm managed clusters into a common database<BR>
<input type="radio" name="accounting_storage_type" value="mysql"> <B>MySQL</B>:
Write job accounting to a MySQL database<BR>
<input type="radio" name="accounting_storage_type" value="pgsql"> <B>PGSQL</B>:
Write job accounting to a PostreSQL database<BR>
<input type="radio" name="accounting_storage_type" value="slurmdbd"> <B>SlurmDBD</B>:
Write job accounting to Slurm DBD (database daemon) which can securely
save the data from many Slurm managed clusters into a common database<BR>
<input type="text" name="accounting_storage_loc" value=""> <B>AccountingStorageLoc</B>:
Location specification or database name.
This is the location of the text file to be written to (used by Log only).
Use a fully qualified pathname. If using a database it is the name of the database you will use or create for the stored data.<br>
<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
<input type="text" name="accounting_storage_host" value=""> <B>AccountingStorageHost</B>:
Host the database is running on for Job Accounting<br>
<input type="text" name="accounting_storage_port" value=""> <B>AccountingStoragePort</B>:
Port the database server is listening on for Job Accounting<br>
<input type="text" name="accounting_storage_user" value=""> <B>AccountingStorageUser</B>:
User we are to use to talk to the database for Job Accounting<br>
<input type="text" name="accounting_storage_pass" value=""> <B>AccountingStoragePass</B>:
Password we are to use to talk to the database for Job Accounting.
In the case of SlurmDBD, this will be an alternate socket name for use with a Munge
daemon providing enterprise-wide authentication (while the default Munge socket
would provide cluster-wide authentication only).<br>
<input type="text" name="cluster_name" value="cluster"> <B>ClusterName</B>:
Name to be recorded in database for jobs from this cluster.
This is important if a single database is used to record information
from multiple Slurm-managed clusters.<br>
<P>
<H2>Process ID Logging</H2>
Define the location into which we can record the daemon's process ID.
This is used for locate the appropriate daemon for signalling.
Specify a specify the fully qualified pathname for the file.
<P>
<input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid">
<B>SlurmctldPidFile</B>
<P>
<input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid">
<B>SlurmdPidFile</B>
<P>
<H2>Timers</H2>
SLURM has a variety of timers to control when to consider a node DOWN,
when to purge job records, how long to give a job to gracefully terminate, etc.
<P>
<input type="text" name="slurmctld_timeout" value="300">
<B>SlurmctldTimeout</B>: How many seconds the backup controller waits before
becoming the master controller
<P>
<input type="text" name="slurmd_timeout" value="300">
<B>SlurmdTimeout</B>: How many seconds the SLURM controller waits for the slurmd
to respond to a request before considering the node DOWN
<P>
<input type="text" name="inactive_limit" value="0">
<B>InactiveLimit</B>: How many seconds the SLURM controller waits for srun
commands to respond before considering the job or job step inactive and
terminating it. A value of zero indicates unlimited wait
<P>
<input type="text" name="min_job_age" value="300">
<B>MinJobAge</B>: How many seconds the SLURM controller waits after a
job terminates before purging its record. A record of the job will
persist in job completion and/or accounting records indefinitely,
but will no longer be visible with the squeue command after puring
<P>
<input type="text" name="kill_wait" value="30">
<B>KillWait</B>: How many seconds a job is given to gracefully terminate
after reaching its time limit and being sent SIGTERM before sending
a SIGKILLL
<P>
<input type="text" name="wait_time" value="0">
<B>WaitTime</B>: How many seconds after a job step's first task terminates
before terminating all remaining tasks. A value of zero indicates unlimited wait
<P>
<BR>
<BR>
<input type=button value="Submit" onClick="javascript:displayfile()">
<input type=reset value="Reset Form">
<P>
</FORM>
<HR>
<P class="footer">LLNL-WEB-402631<BR>
Last modified 13 May 2009</P>
</BODY>