| <!-- |
| Copyright (C) 2005-2007 The Regents of the University of California. |
| Copyright (C) 2008-2011 Lawrence Livermore National Security. |
| Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov> |
| |
| This file is part of Slurm, a resource management program. |
| For details, see <https://slurm.schedmd.com/>. |
| |
| Slurm is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2 of the License, or (at your option) |
| any later version. |
| |
| Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| details. |
| |
| You should have received a copy of the GNU General Public License along |
| with Slurm; if not, write to the Free Software Foundation, Inc., |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| --> |
| <!DOCTYPE html> |
| <html lang="en-US"> |
| <head> |
| <title>Slurm System Configuration Tool</title> |
| <SCRIPT type="text/javascript"> |
| <!-- |
| function print_pair(name,value) |
| { |
| if (value) |
| return name + "=" + value |
| return "#" + name + "=" |
| } |
| |
| function get_field(name,form) |
| { |
| if (form.value) |
| return name + "=" + form.value |
| return "#" + name + "=" |
| } |
| |
| function get_field2(name,form) |
| { |
| if (form.value) |
| return name + "=" + form.value |
| return "" |
| } |
| |
| function get_radio_field_skipfirst(name,form) |
| { |
| for (var i=1; i < form.length; i++) |
| { |
| if (form[i].checked) |
| { |
| return name + "=" + form[i].value |
| } |
| } |
| return "#" + name + "=" |
| } |
| |
| function get_radio_value(form) |
| { |
| for (var i=0; i < form.length; i++) |
| { |
| if (form[i].checked) |
| { |
| return form[i].value |
| } |
| } |
| } |
| |
| function get_checkbox_value(form) |
| { |
| comma_list = "" |
| for (var i=0; i < form.length; i++) |
| { |
| if (form[i].checked) |
| { |
| if (comma_list != "") |
| comma_list = comma_list +"," |
| comma_list = comma_list+form[i].value |
| } |
| } |
| return comma_list |
| } |
| |
| function hide_box() |
| { |
| var popup = document.getElementById('out_box'); |
| popup.style.visibility = 'hidden'; |
| |
| } |
| |
| function displayfile() |
| { |
| var printme = "# slurm.conf file generated by configurator.html.<br>" + |
| "# Put this file on all nodes of your cluster.<br>" + |
| "# See the slurm.conf man page for more information.<br>" + |
| "#<br>" + |
| get_field("ClusterName",document.config.cluster_name) + "<br>" + |
| "SlurmctldHost=" + document.config.control_machine.value + "<br>" + |
| get_field("SlurmctldHost",document.config.backup_controller) + "<br>" + |
| "# <br>" + |
| "#DisableRootJobs=NO <br>" + |
| "#EnforcePartLimits=NO <br>" + |
| get_field("Epilog",document.config.epilog) + "<br>" + |
| "#EpilogSlurmctld= <br>" + |
| "#FirstJobId=1 <br>" + |
| "#MaxJobId=67043328 <br>" + |
| "#GresTypes= <br>" + |
| "#GroupUpdateForce=0 <br>" + |
| "#GroupUpdateTime=600 <br>" + |
| "#JobFileAppend=0 <br>" + |
| "#JobRequeue=1 <br>" + |
| "#JobSubmitPlugins=lua <br>" + |
| "#KillOnBadExit=0 <br>" + |
| "#LaunchType=launch/slurm <br>" + |
| "#Licenses=foo*4,bar <br>" + |
| "#MailProg=/bin/mail <br>" + |
| "#MaxJobCount=10000 <br>" + |
| "#MaxStepCount=40000 <br>" + |
| "#MaxTasksPerNode=512 <br>" + |
| print_pair("MpiDefault",get_radio_value(document.config.mpi_default)) + "<br>" + |
| "#MpiParams=ports=#-# <br>" + |
| "#PluginDir= <br>" + |
| "#PlugStackConfig= <br>" + |
| "#PrivateData=jobs <br>" + |
| "ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" + |
| get_field("Prolog",document.config.prolog) + "<br>" + |
| "#PrologFlags= <br>" + |
| "#PrologSlurmctld= <br>" + |
| "#PropagatePrioProcess=0 <br>" + |
| "#PropagateResourceLimits= <br>" + |
| "#PropagateResourceLimitsExcept= <br>" + |
| "#RebootProgram= <br>" + |
| "ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" + |
| "SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" + |
| "SlurmctldPort=" + document.config.slurmctld_port.value + "<br>" + |
| "SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" + |
| "SlurmdPort=" + document.config.slurmd_port.value + "<br>" + |
| "SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" + |
| "SlurmUser=" + document.config.slurm_user.value + "<br>" + |
| "#SlurmdUser=root <br>" + |
| get_field("SrunEpilog",document.config.srun_epilog) + "<br>" + |
| get_field("SrunProlog",document.config.srun_prolog) + "<br>" + |
| "StateSaveLocation=" + document.config.state_save_location.value + "<br>" + |
| print_pair("SwitchType",get_radio_value(document.config.switch_type)) + "<br>" + |
| get_field("TaskEpilog",document.config.task_epilog) + "<br>" + |
| "TaskPlugin=" + get_checkbox_value(document.config.task_plugin) + "<br>" + |
| get_field("TaskProlog",document.config.task_prolog) + "<br>" + |
| "#TopologyPlugin=topology/tree <br>" + |
| "#TmpFS=/tmp <br>" + |
| "#TrackWCKey=no <br>" + |
| "#TreeWidth= <br>" + |
| "#UnkillableStepProgram= <br>" + |
| "#UsePAM=0 <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# TIMERS <br>" + |
| "#BatchStartTimeout=10 <br>" + |
| "#CompleteWait=0 <br>" + |
| "#EpilogMsgTime=2000 <br>" + |
| "#GetEnvTimeout=2 <br>" + |
| "#HealthCheckInterval=0 <br>" + |
| "#HealthCheckProgram= <br>" + |
| "InactiveLimit=" + document.config.inactive_limit.value + "<br>" + |
| "KillWait=" + document.config.kill_wait.value + "<br>" + |
| "#MessageTimeout=10 <br>" + |
| "#ResvOverRun=0 <br>" + |
| "MinJobAge=" + document.config.min_job_age.value + "<br>" + |
| "#OverTimeLimit=0 <br>" + |
| "SlurmctldTimeout=" + document.config.slurmctld_timeout.value + "<br>" + |
| "SlurmdTimeout=" + document.config.slurmd_timeout.value + "<br>" + |
| "#UnkillableStepTimeout=60 <br>" + |
| "#VSizeFactor=0 <br>" + |
| "Waittime=" + document.config.wait_time.value + "<br>" + |
| "# <br>" + |
| "# <br>" + |
| "# SCHEDULING <br>" + |
| "#DefMemPerCPU=0 <br>" + |
| "#MaxMemPerCPU=0 <br>" + |
| "#SchedulerTimeSlice=30 <br>" + |
| "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" + |
| "SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" + |
| "# <br>" + |
| "# <br>" + |
| "# JOB PRIORITY <br>" + |
| "#PriorityFlags= <br>" + |
| "#PriorityType=priority/multifactor <br>" + |
| "#PriorityDecayHalfLife= <br>" + |
| "#PriorityCalcPeriod= <br>" + |
| "#PriorityFavorSmall= <br>" + |
| "#PriorityMaxAge= <br>" + |
| "#PriorityUsageResetPeriod= <br>" + |
| "#PriorityWeightAge= <br>" + |
| "#PriorityWeightFairshare= <br>" + |
| "#PriorityWeightJobSize= <br>" + |
| "#PriorityWeightPartition= <br>" + |
| "#PriorityWeightQOS= <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# LOGGING AND ACCOUNTING <br>" + |
| "#AccountingStorageEnforce=0 <br>" + |
| get_field("AccountingStorageHost",document.config.accounting_storage_host) + "<br>" + |
| get_field("AccountingStoragePort",document.config.accounting_storage_port) + "<br>" + |
| print_pair("AccountingStorageType",get_radio_value(document.config.accounting_storage_type)) + "<br>" + |
| get_field("AccountingStoreFlags",document.config.acctng_store_flags) + "<br>" + |
| get_field("JobCompHost",document.config.job_comp_host) + "<br>" + |
| get_field("JobCompLoc",document.config.job_comp_loc) + "<br>" + |
| get_field("JobCompParams",document.config.job_comp_params) + "<br>" + |
| get_field("JobCompPass",document.config.job_comp_pass) + "<br>" + |
| get_field("JobCompPort",document.config.job_comp_port) + "<br>" + |
| "JobCompType=jobcomp/" + get_radio_value(document.config.job_comp_type) + "<br>" + |
| get_field("JobCompUser",document.config.job_comp_user) + "<br>" + |
| "#JobContainerType= <br>" + |
| get_field("JobAcctGatherFrequency",document.config.job_acct_gather_frequency) + "<br>" + |
| print_pair("JobAcctGatherType",get_radio_value(document.config.job_acct_gather_type)) + "<br>" + |
| "SlurmctldDebug=" + document.config.slurmctld_debug.value + "<br>" + |
| get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" + |
| "SlurmdDebug=" + document.config.slurmd_debug.value + "<br>" + |
| get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" + |
| "#SlurmSchedLogFile= <br>" + |
| "#SlurmSchedLogLevel= <br>" + |
| "#DebugFlags= <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# POWER SAVE SUPPORT FOR IDLE NODES (optional) <br>" + |
| "#SuspendProgram= <br>" + |
| "#ResumeProgram= <br>" + |
| "#SuspendTimeout= <br>" + |
| "#ResumeTimeout= <br>" + |
| "#ResumeRate= <br>" + |
| "#SuspendExcNodes= <br>" + |
| "#SuspendExcParts= <br>" + |
| "#SuspendRate= <br>" + |
| "#SuspendTime= <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# COMPUTE NODES <br>" + |
| "NodeName=" + document.config.node_name.value + |
| get_field2(" NodeAddr",document.config.node_addr) + |
| get_field2(" CPUs",document.config.procs) + |
| get_field2(" RealMemory",document.config.memory) + |
| get_field2(" Sockets",document.config.sockets) + |
| get_field2(" CoresPerSocket",document.config.cores_per_socket) + |
| get_field2(" ThreadsPerCore",document.config.threads_per_core) + |
| " State=UNKNOWN <br>" + |
| "PartitionName=" + document.config.partition_name.value + |
| " Nodes=ALL" + |
| " Default=YES"+ |
| " MaxTime=" + document.config.max_time.value + |
| " State=UP" |
| |
| //scroll(0,0); |
| //var popup = document.getElementById('out_box'); |
| |
| //popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>"; |
| //popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>"; |
| //popup.innerHTML += printme; |
| //popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>"; |
| //popup.innerHTML += "<a href='javascript:hide_box();'>close</a>"; |
| |
| //popup.style.visibility = 'visible'; |
| |
| // OLD CODE |
| document.open(); |
| document.write(printme); |
| document.close(); |
| } |
| |
| --> |
| </SCRIPT> |
| <!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> --> |
| </head> |
| <body> |
| <form name=config> |
| <H1>Slurm Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1> |
| <P>This form can be used to create a Slurm configuration file with |
| you controlling many of the important configuration parameters.</P> |
| |
| <p>This is the full version of the Slurm configuration tool. This version |
| has all the configuration options to create a Slurm configuration file. There |
| is a simplified version of the Slurm configuration tool available at |
| <a href="configurator.easy.html">configurator.easy.html</a>.</p> |
| |
| <P><B>This tool supports Slurm version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B> |
| Configuration files for other versions of Slurm should be built |
| using the tool distributed with it in <i>doc/html/configurator.html</i>. |
| Some parameters will be set to default values, but you can |
| manually edit the resulting <I>slurm.conf</I> as desired |
| for greater flexibility. See <I>man slurm.conf</I> for more |
| details about the configuration parameters.</P> |
| |
| <P>Note the while Slurm daemons create log files and other files as needed, |
| it treats the lack of parent directories as a fatal error. |
| This prevents the daemons from running if critical file systems are |
| not mounted and will minimize the risk of cold-starting (starting |
| without preserving jobs).</P> |
| |
| <P>Note that this configuration file must be installed on all nodes |
| in your cluster.</P> |
| |
| <P>After you have filled in the fields of interest, use the |
| "Submit" button on the bottom of the page to build the <I>slurm.conf</I> |
| file. It will appear on your web browser. Save the file in text format |
| as <I>slurm.conf</I> for use by Slurm. |
| |
| <P>For more information about Slurm, see |
| <A HREF="https://slurm.schedmd.com/slurm.html">https://slurm.schedmd.com/slurm.html</A> |
| |
| <h2>Cluster Name</h2> |
| <input type="text" name="cluster_name" value="cluster"> <b>ClusterName</b>: |
| The name of your cluster. Using different names for each of your clusters is |
| important when using a single database to record information from multiple |
| Slurm-managed clusters. |
| |
| <H2>Control Machines</H2> |
| Define the hostname of the computer on which the Slurm controller and |
| optional backup controller will execute. |
| Hostname values should not be the fully qualified domain |
| name (e.g. use <I>tux</I> rather than <I>tux.abc.com</I>). |
| <P> |
| <input type="text" name="control_machine" value="linux0"> <B>SlurmctldHost</B>: |
| Primary Controller Hostname |
| <P> |
| <input type="text" name="backup_controller"> <B>BackupController</B>: Backup |
| Controller Hostname (optional) |
| <P> |
| |
| <H2>Compute Machines</H2> |
| Define the machines on which user applications can run. |
| You can also specify addresses of these computers if desired |
| (defaults to their hostnames). |
| Only a few of the possible parameters associated with the nodes will |
| be set by this tool, but many others are available. |
| Executing the command <i>slurmd -C</i> on each compute node will print its |
| physical configuration (sockets, cores, real memory size, etc.), which |
| can be used in constructing the <i>slurm.conf</i> file. |
| All of the nodes will be placed into a single partition (or queue) |
| with global access. Many options are available to group nodes into |
| partitions with a wide variety of configuration parameters. |
| Manually edit the <i>slurm.conf</i> produced to exercise these options. |
| Node names and addresses may be specified using a numeric range specification. |
| |
| <P> |
| <input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>: |
| Compute nodes |
| <P> |
| <input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses |
| (optional) |
| <P> |
| <input type="text" name="partition_name" value="debug"> <B>PartitionName</B>: |
| Name of the one partition to be created |
| <P> |
| <input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>: |
| Maximum time limit of jobs in minutes or INFINITE |
| <P> |
| The following parameters describe a node's configuration. |
| Set a value for <B>CPUs</B>. |
| The other parameters are optional, but provide more control over scheduled resources: |
| <P> |
| <input type="text" name="procs" value="1"> <B>CPUs</B>: Count of processors |
| on each compute node. |
| If CPUs is omitted, it will be inferred from: |
| Sockets, CoresPerSocket, and ThreadsPerCore. |
| <P> |
| <input type="text" name="sockets" value=""> |
| <B>Sockets</B>: |
| Number of physical processor sockets/chips on the node. |
| If Sockets is omitted, it will be inferred from: |
| CPUs, CoresPerSocket, and ThreadsPerCore. |
| <P> |
| <input type="text" name="cores_per_socket" value=""> |
| <B>CoresPerSocket</B>: |
| Number of cores in a single physical processor socket. |
| The CoresPerSocket value describes physical cores, not |
| the logical number of processors per socket. |
| <P> |
| <input type="text" name="threads_per_core" value=""> |
| <B>ThreadsPerCore</B>: |
| Number of logical threads in a single physical core. |
| <P> |
| <input type="text" name="memory" value=""> <B>RealMemory</B>: Amount |
| of real memory. This parameter is required when specifying Memory as a |
| consumable resource with the select/cons_tres plug-in. See below |
| under Resource Selection. |
| <P> |
| |
| <H2>Slurm User</H2> |
| The Slurm controller (slurmctld) can run without elevated privileges, |
| so it is recommended that a user "slurm" be created for it. For testing |
| purposes any user name can be used. |
| <P> |
| <input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B> |
| <P> |
| |
| |
| <H2>Slurm Port Numbers</H2> |
| The Slurm controller (slurmctld) requires a unique port for communications |
| as do the Slurm compute node daemons (slurmd). If not set, slurm ports |
| are set by checking for an entry in <I>/etc/services</I> and if that |
| fails by using an interval default set at Slurm build time. |
| <P> |
| <input type="text" name="slurmctld_port" value="6817"> <B>SlurmctldPort</B> |
| <P> |
| <input type="text" name="slurmd_port" value="6818"> <B>SlurmdPort</B> |
| <P> |
| |
| <H2>State Preservation</H2> |
| Define the location of a directory where the slurmctld daemon saves its state. |
| This should be a fully qualified pathname which can be read and written to |
| by the Slurm user on both the control machine and backup controller (if configured). |
| The location of a directory where slurmd saves state should also be defined. |
| This must be a unique directory on each compute server (local disk). |
| The use of a highly reliable file system (e.g. RAID) is recommended. |
| <P> |
| <input type="text" name="state_save_location" value="/var/spool/slurmctld"> |
| <b>StateSaveLocation</b>: Slurmctld state save directory |
| <b>Must be writable by all SlurmctldHost nodes</b> |
| <P> |
| <input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd"> |
| <b>SlurmdSpoolDir</b>: Slurmd state save directory |
| <P> |
| Define when a non-responding (DOWN) node is returned to service.<BR> |
| Select one value for <B>ReturnToService</B>:<BR> |
| <input type="radio" name="return_to_service" value="0"> |
| <B>0</B>: When explicitly restored to service by an administrator.<BR> |
| <input type="radio" name="return_to_service" value="1" checked> |
| <B>1</B>:Upon registration with a valid configuration only if it was set DOWN |
| due to being non-responsive.<BR> |
| <input type="radio" name="return_to_service" value="2"> |
| <B>2</B>:Upon registration with a valid configuration.<BR> |
| <P> |
| |
| <H2>Scheduling</H2> |
| Define the mechanism to be used for controlling job ordering.<BR> |
| Select one value for <B>SchedulerType</B>:<BR> |
| <input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>: |
| FIFO with backfill<BR> |
| <input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In |
| First-Out (FIFO)<BR> |
| <P> |
| |
| <H2>Interconnect</H2> |
| Define the node interconnect used.<BR> |
| Select one value for <B>SwitchType</B>:<BR> |
| <input type="radio" name="switch_type" value="switch/hpe_slingshot"> <B>HPE |
| Slingshot</B>: HPE Slingshot proprietary interconnect<BR> |
| <input type="radio" name="switch_type" value="" checked> <B>None</B>: No special |
| <P> |
| |
| <H2>Default MPI Type</H2> |
| Specify the type of MPI to be used by default. Slurm will configure environment |
| variables accordingly. Users can over-ride this specification with an srun option.<BR> |
| Select one value for <B>MpiDefault</B>:<BR> |
| <input type="radio" name="mpi_default" value="mpi/pmi2"> <B>MPI-PMI2</B> |
| (For PMI2-supporting MPI implementations)<BR> |
| <input type="radio" name="mpi_default" value="mpi/pmix"> <B>MPI-PMIx</B> |
| (Exascale PMI implementation)<BR> |
| <input type="radio" name="mpi_default" value="" checked> <B>None</B>: |
| This works for most other MPI types.<BR> |
| <P> |
| |
| <H2>Process Tracking</H2> |
| Define the algorithm used to identify which processes are associated with a |
| given job. This is used signal, kill, and account for the processes associated |
| with a job step.<BR> |
| Select one value for <B>ProctrackType</B>:<BR> |
| <input type="radio" name="proctrack_type" value="cgroup" checked> <B>Cgroup</B>: Use |
| Linux <i>cgroup</i> to create a job container and track processes. |
| Build a <i>cgroup.conf</i> file as well<BR> |
| <input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use |
| parent process ID records, processes can escape from Slurm control<BR> |
| <input type="radio" name="proctrack_type" value="pgid"> <B>Pgid</B>: Use Unix |
| Process Group ID, processes changing their process group ID can escape from Slurm |
| control<BR> |
| <P> |
| |
| <H2>Resource Selection</H2> |
| Define resource (node) selection algorithm to be used.<BR> |
| Select one value for <B>SelectType</B>:<BR> |
| <input type="radio" name="select_type" value="cons_tres" checked> |
| <b>cons_tres</b>: Allocate individual processors, memory, GPUs, and other |
| trackable resources<br /> |
| <input type="radio" name="select_type" value="linear"> |
| <B>Linear</B>: Node-base |
| resource allocation, does not manage individual processor allocation<BR> |
| <P> |
| |
| <H2>Task Launch</H2> |
| Define a task launch plugin. This may be used to |
| provide resource management within a node (e.g. pinning |
| tasks to specific processors). |
| Select one value for <B>TaskPlugin</B>:<BR> |
| <input type="checkbox" name="task_plugin" value="task/none"> <B>None</B>: No task launch actions<BR> |
| <input type="checkbox" name="task_plugin" value="task/affinity" checked> <B>Affinity</B>: |
| CPU affinity support |
| (see srun man pages for the --cpu-bind, --mem-bind, and -E options)<BR> |
| <input type="checkbox" name="task_plugin" value="task/cgroup" checked> <B>Cgroup</B>: |
| Allocated resources constraints enforcement using Linux Control Groups |
| (see cgroup.conf man page) |
| <P> |
| |
| <H2>Prolog and Epilog</H2> |
| <P> |
| <B>Prolog/Epilog</B>: Path that will be executed as root on every |
| node of a user's job before the job's tasks will be initiated there |
| and after that job has terminated. |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="prolog" value="" size=40> <B>Prolog</B> |
| <DT> <input type="text" name="epilog" value="" size=40> <B>Epilog</B> |
| </DL> |
| |
| <P> |
| <B>SrunProlog/Epilog</B>: Fully qualified path to be executed by srun at |
| job step initiation and termination. These parameters may be overridden by |
| srun's --prolog and --epilog options |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="srun_prolog" value="" size=40> <B>SrunProlog</B> |
| <DT> <input type="text" name="srun_epilog" value="" size=40> <B>SrunEpilog</B> |
| </DL> |
| |
| <P> |
| <B>TaskProlog/Epilog</B>: Fully qualified path to be executed as the user |
| before each task begins execution and after each task terminates. |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="task_prolog" value="" size=40> <B>TaskProlog</B> |
| <DT> <input type="text" name="task_epilog" value="" size=40> <B>TaskEpilog</B> |
| </DL> |
| |
| <H2>Event Logging</H2> |
| Slurmctld and slurmd daemons can each be configured with different |
| levels of logging verbosity from 0 (quiet) to 7 (extremely verbose). |
| Each may also be configured to use debug files. Use fully qualified |
| pathnames for the files. |
| <P> |
| <input type="text" name="slurmctld_debug" value="info"> <b>SlurmctldDebug</b> |
| (default is info) |
| <P> |
| <input type="text" name="slurmctld_logfile" value="/var/log/slurmctld.log"> |
| <b>SlurmctldLogFile</b> (if empty, log goes to syslog) |
| <P> |
| <input type="text" name="slurmd_debug" value="info"> <b>SlurmdDebug</b> |
| (default is info) |
| <P> |
| <input type="text" name="slurmd_logfile" value="/var/log/slurmd.log"> |
| <b>SlurmdLogFile</b> (if empty, log goes to syslog. String "%h" in name gets |
| replaced with hostname) |
| <P> |
| |
| <H2>Job Completion Logging</H2> |
| Define the job completion logging mechanism to be used. Defaults to None. |
| Select one value for <B>JobCompType</B>:<BR> |
| <input type="radio" name="job_comp_type" value="none" checked> <B>None</B>: |
| No job completion logging<BR> |
| <input type="radio" name="job_comp_type" value="elasticsearch"> <B>Elasticsearch</B>: |
| Write job completion info to an Elasticsearch server<BR> |
| <input type="radio" name="job_comp_type" value="filetxt"> <B>FileTxt</B>: |
| Write job completion status to a text file<BR> |
| <input type="radio" name="job_comp_type" value="kafka"> <b>Kafka</b>: |
| Write job completion info to a Kafka server<br> |
| <input type="radio" name="job_comp_type" value="lua"> <B>Lua</B>: |
| Use a script called jobcomp.lua to log job completion<BR> |
| <input type="radio" name="job_comp_type" value="script"> <B>Script</B>: |
| Use an arbitrary script to log job completion<BR> |
| <input type="radio" name="job_comp_type" value="mysql"> <B>MySQL</B>: |
| Write completion status to a MySQL or MariaDB database<BR> |
| <P> |
| <input type="text" name="job_comp_loc" value=""> <B>JobCompLoc</B>: |
| This is the location of the text file to be written to (if JobCompType=filetxt), |
| or the script to be run (if JobCompType=script), or the URL to the |
| Elasticsearch server (if JobCompType=elasticsearch), or file containing |
| librdkafka parameters (if JobCompType=jobcomp/kafka), database name |
| (for other values of JobCompType). |
| <p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br> |
| <input type="text" name="job_comp_host" value=""> <B>JobCompHost</B>: |
| Host the database is running on for Job completion<br> |
| <input type="text" name="job_comp_port" value=""> <B>JobCompPort</B>: |
| Port the database server is listening on for Job completion<br> |
| <input type="text" name="job_comp_user" value=""> <B>JobCompUser</B>: |
| User we are to use to talk to the database for Job completion<br> |
| <input type="text" name="job_comp_params" value=""> <B>JobCompParams</B>: |
| Pass arbitrary text string to Job completion plugin<br> |
| <input type="text" name="job_comp_pass" value=""> <B>JobCompPass</B>: |
| Password we are to use to talk to the database for Job completion<br> |
| <P> |
| |
| <H2>Job Accounting Gather</H2> |
| Slurm accounts for resource use per job. System specifics can be polled |
| determined by system type<BR> |
| Select one value for <B>JobAcctGatherType</B>:<BR> |
| <input type="radio" name="job_acct_gather_type" value="" checked> <B>None</B>: No |
| job accounting<BR> |
| <input type="radio" name="job_acct_gather_type" value="jobacct_gather/cgroup"> <b>cgroup</b>: |
| Specific Linux cgroup information gathered, use with Linux systems only<br> |
| <input type="radio" name="job_acct_gather_type" value="jobacct_gather/linux"> <B>Linux</B>: Specific |
| Linux process table information gathered, use with Linux systems only<BR> |
| |
| <input type="text" name="job_acct_gather_frequency" value="30"> <B>JobAcctGatherFrequency</B>: |
| polling interval in seconds. Zero disables periodic sampling.<BR> |
| <P> |
| |
| <H2>Job Accounting Storage</H2> |
| Used with the Job Accounting Gather Slurm can store the accounting information in many different fashions. Fill in your systems choice here<BR> |
| Select one value for <B>AccountingStorageType</B>:<BR> |
| <input type="radio" name="accounting_storage_type" value="" checked> <B>None</B>: |
| No job accounting storage<BR> |
| <input type="radio" name="accounting_storage_type" value="accounting_storage/slurmdbd"> <B>SlurmDBD</B>: |
| Write job accounting to SlurmDBD (database daemon) which can securely |
| save the data from many Slurm managed clusters into a common database<BR> |
| <p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br> |
| <input type="text" name="accounting_storage_host" value=""> <B>AccountingStorageHost</B>: |
| Host the database is running on for Job Accounting<br> |
| <input type="text" name="accounting_storage_port" value=""> <B>AccountingStoragePort</B>: |
| Port the database server is listening on for Job Accounting<br> |
| <input type="text" name="acctng_store_flags" value=""> <B>AccountingStoreFlags</B>: |
| Comma separated list. Options are:<br> |
| 'job_comment' - store the job comment field in the database;<br> |
| 'job_env' - store a batch job's env in the database;<br> |
| 'job_extra' - store a batch job's extra field in the database;<br> |
| 'job_script' - store the job batch script in the database.<br> |
| <P> |
| |
| <H2>Process ID Logging</H2> |
| Define the location into which we can record the daemon's process ID. |
| This is used for locate the appropriate daemon for signaling. |
| Specify a specify the fully qualified pathname for the file. |
| <P> |
| <input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid"> |
| <B>SlurmctldPidFile</B> |
| <P> |
| <input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid"> |
| <B>SlurmdPidFile</B> |
| <P> |
| |
| <H2>Timers</H2> |
| Slurm has a variety of timers to control when to consider a node DOWN, |
| when to purge job records, how long to give a job to gracefully terminate, etc. |
| <P> |
| <input type="text" name="slurmctld_timeout" value="120"> |
| <B>SlurmctldTimeout</B>: How many seconds the backup controller waits before |
| becoming the active controller |
| <P> |
| <input type="text" name="slurmd_timeout" value="300"> |
| <B>SlurmdTimeout</B>: How many seconds the Slurm controller waits for the slurmd |
| to respond to a request before considering the node DOWN |
| <P> |
| <input type="text" name="inactive_limit" value="0"> |
| <B>InactiveLimit</B>: How many seconds the Slurm controller waits for srun |
| commands to respond before considering the job or job step inactive and |
| terminating it. A value of zero indicates unlimited wait |
| <P> |
| <input type="text" name="min_job_age" value="300"> |
| <B>MinJobAge</B>: How many seconds the Slurm controller waits after a |
| job terminates before purging its record. A record of the job will |
| persist in job completion and/or accounting records indefinitely, |
| but will no longer be visible with the squeue command after puring |
| <P> |
| <input type="text" name="kill_wait" value="30"> |
| <B>KillWait</B>: How many seconds a job is given to gracefully terminate |
| after reaching its time limit and being sent SIGTERM before sending |
| a SIGKILLL |
| <P> |
| <input type="text" name="wait_time" value="0"> |
| <B>WaitTime</B>: How many seconds after a job step's first task terminates |
| before terminating all remaining tasks. A value of zero indicates unlimited wait |
| <P> |
| |
| <BR> |
| <BR> |
| <input type=button value="Submit" onClick="javascript:displayfile()"> |
| <input type=reset value="Reset Form"> |
| </form> |
| <hr> |
| <p> |
| <a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br> |
| Last modified 24 June 2025</p> |
| </body> |
| </html> |