| <!-- |
| Copyright (C) 2005-2007 The Regents of the University of California. |
| Copyright (C) 2008-2009 Lawrence Livermore National Security. |
| Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov> |
| |
| This file is part of SLURM, a resource management program. |
| For details, see <https://computing.llnl.gov/linux/slurm/>. |
| |
| SLURM is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2 of the License, or (at your option) |
| any later version. |
| |
| SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| details. |
| |
| You should have received a copy of the GNU General Public License along |
| with SLURM; if not, write to the Free Software Foundation, Inc., |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| --> |
| <HTML> |
| <HEAD><TITLE>SLURM System Configuration Tool</TITLE> |
| <SCRIPT type="text/javascript"> |
| <!-- |
| function get_field(name,form) |
| { |
| if (form.value) |
| return name + "=" + form.value |
| return "#" + name + "=" |
| } |
| |
| function get_field2(name,form) |
| { |
| if (form.value) |
| return name + "=" + form.value |
| return "" |
| } |
| |
| function get_accounting_storage_type_field(gather, form_storage) |
| { |
| for (var i=0; i < form_storage.length; i++) |
| { |
| if (form_storage[i].checked) |
| { |
| if(form_storage[i].value == "none" && !(gather == "none")) |
| { |
| return "filetxt" |
| } |
| else if(!(form_storage[i].value == "none") && gather == "none") |
| { |
| return "none" |
| } |
| else { |
| return form_storage[i].value |
| } |
| } |
| } |
| } |
| |
| function get_radio_field_skipfirst(name,form) |
| { |
| for (var i=1; i < form.length; i++) |
| { |
| if (form[i].checked) |
| { |
| return name + "=" + form[i].value |
| } |
| } |
| return "#" + name + "=" |
| } |
| |
| function get_radio_value(form) |
| { |
| for (var i=0; i < form.length; i++) |
| { |
| if (form[i].checked) |
| { |
| return form[i].value |
| } |
| } |
| } |
| |
| // When choosing SelectTypeParameters make sure the select_type |
| // matches the parameter being checked |
| function set_select_type(form) |
| { |
| if (form.name == "cons_res_params") { |
| document.config.select_type[0].click() // cons_res |
| } else if (form.name == "linear_params") { |
| document.config.select_type[1].click() // linear |
| } else if (form.name == "bluegene_params") { |
| document.config.select_type[2].click() // bluegene |
| } |
| } |
| |
| // When generating SelectTypeParameters return the parameters that |
| // correspond to the checked select_type |
| function get_select_type_params() |
| { |
| if (document.config.select_type[0].checked) { // cons_res |
| return get_radio_field_skipfirst("SelectTypeParameters", |
| document.config.cons_res_params) |
| } else if (document.config.select_type[1].checked) { // linear |
| return "#SelectTypeParameters=" |
| } else if (document.config.select_type[2].checked) { // bluegene |
| return "#SelectTypeParameters=" |
| } |
| } |
| |
| // When generating TaskPluginParam return the parameters that |
| // correspond to the checked task_plugin |
| function get_task_plugin_param() |
| { |
| for (var i=0; i<document.config.task_plugin.length; i++) { |
| if (document.config.task_plugin[i].checked) { |
| if (document.config.task_plugin[i].value == "affinity") { |
| return "TaskPluginParam=" + |
| get_radio_value(document.config.task_plugin_param) |
| } |
| } |
| } |
| return "#TaskPluginParam=" |
| } |
| |
| function hide_box() |
| { |
| var popup = document.getElementById('out_box'); |
| popup.style.visibility = 'hidden'; |
| |
| } |
| |
| function displayfile() |
| { |
| var printme = "# slurm.conf file generated by configurator.html.<br>" + |
| "# Put this file on all nodes of your cluster.<br>" + |
| "# See the slurm.conf man page for more information.<br>" + |
| "#<br>" + |
| "ControlMachine=" + document.config.control_machine.value + "<br>" + |
| get_field("ControlAddr",document.config.control_addr) + "<br>" + |
| get_field("BackupController",document.config.backup_controller) + "<br>" + |
| get_field("BackupAddr",document.config.backup_addr) + "<br>" + |
| "# <br>" + |
| "AuthType=auth/" + get_radio_value(document.config.auth_type) + "<br>" + |
| "CacheGroups=" + get_radio_value(document.config.cache_groups) + "<br>" + |
| "#CheckpointType=checkpoint/none <br>" + |
| "CryptoType=crypto/" + get_radio_value(document.config.crypto_type) + "<br>" + |
| "#DisableRootJobs=NO <br>" + |
| "#EnforcePartLimits=NO <br>" + |
| get_field("Epilog",document.config.epilog) + "<br>" + |
| "#PrologSlurmctld= <br>" + |
| "#FirstJobId=1 <br>" + |
| "#JobCheckpointDir=/var/slurm/checkpoint <br>" + |
| get_field("JobCredentialPrivateKey", document.config.private_key) + "<br>" + |
| get_field("JobCredentialPublicCertificate", document.config.public_key) + "<br>" + |
| "#JobFileAppend=0 <br>" + |
| "#JobRequeue=1 <br>" + |
| "#KillOnBadExit=0 <br>" + |
| "#Licenses=foo*4,bar <br>" + |
| "#MailProg=/bin/mail <br>" + |
| "#MaxJobCount=5000 <br>" + |
| "MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" + |
| "#MpiParams=ports=#-# <br>" + |
| "#PluginDir= <br>" + |
| "#PlugStackConfig= <br>" + |
| "#PrivateData=jobs <br>" + |
| "ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" + |
| get_field("Prolog",document.config.prolog) + "<br>" + |
| "#PrologSlurmctld= <br>" + |
| "#PropagatePrioProcess=0 <br>" + |
| "#PropagateResourceLimits= <br>" + |
| "#PropagateResourceLimitsExcept= <br>" + |
| "ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" + |
| "#SallocDefaultCommand= <br>" + |
| "SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" + |
| "SlurmctldPort=" + document.config.slurmctld_port.value + "<br>" + |
| "SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" + |
| "SlurmdPort=" + document.config.slurmd_port.value + "<br>" + |
| "SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" + |
| "SlurmUser=" + document.config.slurm_user.value + "<br>" + |
| get_field("SrunEpilog",document.config.srun_epilog) + "<br>" + |
| get_field("SrunProlog",document.config.srun_prolog) + "<br>" + |
| "StateSaveLocation=" + document.config.state_save_location.value + "<br>" + |
| "SwitchType=switch/" + get_radio_value(document.config.switch_type) + "<br>" + |
| get_field("TaskEpilog",document.config.task_epilog) + "<br>" + |
| "TaskPlugin=task/" + get_radio_value(document.config.task_plugin) + "<br>" + |
| get_task_plugin_param() + "<br>" + |
| get_field("TaskProlog",document.config.task_prolog) + "<br>" + |
| "#TopologyPlugin=topology/tree <br>" + |
| "#TmpFs=/tmp <br>" + |
| "#TrackWCKey=no <br>" + |
| "#TreeWidth= <br>" + |
| "#UnkillableStepProgram= <br>" + |
| "#UnkillableStepTimeout= <br>" + |
| "#UsePAM=0 <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# TIMERS <br>" + |
| "#BatchStartTimeout=10 <br>" + |
| "#CompleteWait=0 <br>" + |
| "#EpilogMsgTime=2000 <br>" + |
| "#GetEnvTimeout=2 <br>" + |
| "#HealthCheckInterval=0 <br>" + |
| "#HealthCheckProgram= <br>" + |
| "InactiveLimit=" + document.config.inactive_limit.value + "<br>" + |
| "KillWait=" + document.config.kill_wait.value + "<br>" + |
| "#MessageTimeout=10 <br>" + |
| "#ResvOverRun=0 <br>" + |
| "MinJobAge=" + document.config.min_job_age.value + "<br>" + |
| "#OverTimeLimit=0 <br>" + |
| "SlurmctldTimeout=" + document.config.slurmctld_timeout.value + "<br>" + |
| "SlurmdTimeout=" + document.config.slurmd_timeout.value + "<br>" + |
| "#UnkillableStepProgram= <br>" + |
| "#UnkillableStepTimeout=60 <br>" + |
| "Waittime=" + document.config.wait_time.value + "<br>" + |
| "# <br>" + |
| "# <br>" + |
| "# SCHEDULING <br>" + |
| "#DefMemPerCPU=0 <br>" + |
| "FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" + |
| "#MaxMemPerCPU=0 <br>" + |
| "#SchedulerRootFilter=1 <br>" + |
| "#SchedulerTimeSlice=30 <br>" + |
| "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" + |
| get_field("SchedulerPort",document.config.scheduler_port) + "<br>" + |
| "SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" + |
| get_select_type_params() + "<br>" + |
| "# <br>" + |
| "# <br>" + |
| "# JOB PRIORITY <br>" + |
| "#PriorityType=priority/basic <br>" + |
| "#PriorityDecayHalfLife= <br>" + |
| "#PriorityFavorSmall= <br>" + |
| "#PriorityMaxAge= <br>" + |
| "#PriorityUsageResetPeriod= <br>" + |
| "#PriorityWeightAge= <br>" + |
| "#PriorityWeightFairshare= <br>" + |
| "#PriorityWeightJobSize= <br>" + |
| "#PriorityWeightPartition= <br>" + |
| "#PriorityWeightQOS= <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# LOGGING AND ACCOUNTING <br>" + |
| "#AccountingStorageEnforce=0 <br>" + |
| get_field("AccountingStorageHost",document.config.accounting_storage_host) + "<br>" + |
| get_field("AccountingStorageLoc",document.config.accounting_storage_loc) + "<br>" + |
| get_field("AccountingStoragePass",document.config.accounting_storage_pass) + "<br>" + |
| get_field("AccountingStoragePort",document.config.accounting_storage_port) + "<br>" + |
| "AccountingStorageType=accounting_storage/" + get_accounting_storage_type_field(get_radio_value(document.config.job_acct_gather_type), document.config.accounting_storage_type) + "<br>" + |
| get_field("AccountingStorageUser",document.config.accounting_storage_user) + "<br>" + |
| get_field("ClusterName",document.config.cluster_name) + "<br>" + |
| "#DebugFlags= <br>" + |
| get_field("JobCompHost",document.config.job_comp_host) + "<br>" + |
| get_field("JobCompLoc",document.config.job_comp_loc) + "<br>" + |
| get_field("JobCompPass",document.config.job_comp_pass) + "<br>" + |
| get_field("JobCompPort",document.config.job_comp_port) + "<br>" + |
| "JobCompType=jobcomp/" + get_radio_value(document.config.job_comp_type) + "<br>" + |
| get_field("JobCompUser",document.config.job_comp_user) + "<br>" + |
| get_field("JobAcctGatherFrequency",document.config.job_acct_gather_frequency) + "<br>" + |
| "JobAcctGatherType=jobacct_gather/" + get_radio_value(document.config.job_acct_gather_type) + "<br>" + |
| "SlurmctldDebug=" + document.config.slurmctld_debug.value + "<br>" + |
| get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" + |
| "SlurmdDebug=" + document.config.slurmd_debug.value + "<br>" + |
| get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" + |
| "# <br>" + |
| "# <br>" + |
| "# POWER SAVE SUPPORT FOR IDLE NODES (optional) <br>" + |
| "#SuspendProgram= <br>" + |
| "#ResumeProgram= <br>" + |
| "#SuspendTimeout= <br>" + |
| "#ResumeTimeout= <br>" + |
| "#ResumeRate= <br>" + |
| "#SuspendExcNodes= <br>" + |
| "#SuspendExcParts= <br>" + |
| "#SuspendRate= <br>" + |
| "#SuspendTime= <br>" + |
| "# <br>" + |
| "# <br>" + |
| "# COMPUTE NODES <br>" + |
| "NodeName=" + document.config.node_name.value + |
| get_field2(" NodeAddr",document.config.node_addr) + |
| get_field2(" Procs",document.config.procs) + |
| get_field2(" RealMemory",document.config.memory) + |
| get_field2(" Sockets",document.config.sockets) + |
| get_field2(" CoresPerSocket",document.config.cores_per_socket) + |
| get_field2(" ThreadsPerCore",document.config.threads_per_core) + |
| " State=UNKNOWN <br>" + |
| "PartitionName=" + document.config.partition_name.value + |
| " Nodes=" + document.config.node_name.value + |
| " Default=YES"+ |
| " MaxTime=" + document.config.max_time.value + |
| " State=UP" |
| |
| //scroll(0,0); |
| //var popup = document.getElementById('out_box'); |
| |
| //popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>"; |
| //popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>"; |
| //popup.innerHTML += printme; |
| //popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>"; |
| //popup.innerHTML += "<a href='javascript:hide_box();'>close</a>"; |
| |
| //popup.style.visibility = 'visible'; |
| |
| // OLD CODE |
| document.open(); |
| document.write(printme); |
| document.close(); |
| } |
| |
| --> |
| </SCRIPT> |
| <!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> --> |
| </HEAD> |
| <BODY> |
| <FORM name=config> |
| <H1>SLURM Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1> |
| <P>This form can be used to create a SLURM configuration file with |
| you controlling many of the important configuration parameters.</P> |
| |
| <P><B>This tool supports SLURM version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B> |
| Configuration files for other versions of SLURM should be built |
| using the tool distributed with it in <i>doc/html/configurator.html</i>. |
| Some parameters will be set to default values, but you can |
| manually edit the resulting <I>slurm.conf</I> as desired |
| for greater flexibility. See <I>man slurm.conf</I> for more |
| details about the configuration parameters.</P> |
| |
| <P>Note the while SLURM daemons create log files and other files as needed, |
| it treats the lack of parent directories as a fatal error. |
| This prevents the daemons from running if critical file systems are |
| not mounted and will minimize the risk of cold-starting (starting |
| without preserving jobs).</P> |
| |
| <P>Note that this configuration file must be installed on all nodes |
| in your cluster.</P> |
| |
| <P>After you have filled in the fields of interest, use the |
| "Submit" button on the bottom of the page to build the <I>slurm.conf</I> |
| file. It will appear on your web browser. Save the file in text format |
| as <I>slurm.conf</I> for use by SLURM. |
| |
| <P>For more information about SLURM, see |
| <A HREF="https://computing.llnl.gov/linux/slurm/">https://computing.llnl.gov/linux/slurm/</A> |
| <P> |
| <A HREF="https://www.llnl.gov/disclaimer.html"><B>Privacy and legal notice</B></A> |
| |
| <H2>Control Machines</H2> |
| Define the hostname of the computer on which the SLURM controller and |
| optional backup controller will execute. You can also specify addresses |
| of these computers if desired (defaults to their hostnames). |
| The IP addresses can be either numeric IP addresses or names. |
| Hostname values should should not be the fully qualified domain |
| name (e.g. use <I>linux</I> rather than <I>linux.llnl.gov</I>). |
| <P> |
| <input type="text" name="control_machine" value="linux0"> <B>ControlMachine</B>: |
| Master Controller Hostname |
| <P> |
| <input type="text" name="control_addr"> <B>ControlAddr</B>: Master Controller |
| Address (optional) |
| <P> |
| <input type="text" name="backup_controller"> <B>BackupController</B>: Backup |
| Controller Hostname (optional) |
| <P> |
| <input type="text" name="backup_addr"> <B>BackupAddr</B>: Backup Controller |
| Address (optional) |
| <P> |
| |
| <H2>Compute Machines</H2> |
| Define the machines on which user applications can run. |
| You can also specify addresses of these computers if desired |
| (defaults to their hostnames). |
| Only a few of the possible parameters associated with the nodes will |
| be set by this tool, but many others are available. |
| All of the nodes will be placed into a single partition (or queue) |
| with global access. Many options are available to group nodes into |
| partitions with a wide variety of configuration parameters. |
| Manually edit the <i>slurm.conf</i> produced to exercise these options. |
| Node names and addresses may be specified using a numeric range specification. |
| |
| <P> |
| <input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>: |
| Compute nodes |
| <P> |
| <input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses |
| (optional) |
| <P> |
| <input type="text" name="partition_name" value="debug"> <B>PartitionName</B>: |
| Name of the one partition to be created |
| <P> |
| <input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>: |
| Maximum time limit of jobs in minutes or INFINITE |
| <P> |
| The following parameters describe a node's configuration. |
| Set a value for <B>Procs</B>. |
| The other parameters are optional, but provide more control over scheduled resources: |
| <P> |
| <input type="text" name="procs" value="1"> <B>Procs</B>: Count of processors |
| on each compute node. |
| If Procs is omitted, it will be inferred from: |
| Sockets, CoresPerSocket, and ThreadsPerCore. |
| <P> |
| <input type="text" name="sockets" value=""> |
| <B>Sockets</B>: |
| Number of physical processor sockets/chips on the node. |
| If Sockets is omitted, it will be inferred from: |
| Procs, CoresPerSocket, and ThreadsPerCore. |
| <P> |
| <input type="text" name="cores_per_socket" value=""> |
| <B>CoresPerSocket</B>: |
| Number of cores in a single physical processor socket. |
| The CoresPerSocket value describes physical cores, not |
| the logical number of processors per socket. |
| <P> |
| <input type="text" name="threads_per_core" value=""> |
| <B>ThreadsPerCore</B>: |
| Number of logical threads in a single physical core. |
| <P> |
| <input type="text" name="memory" value=""> <B>RealMemory</B>: Amount |
| of real memory. This parameter is required when specifying Memory as a |
| consumable resource with the select/cons_res plug-in. See below |
| under Resource Selection. |
| <P> |
| |
| <H2>SLURM User</H2> |
| The SLURM controller (slurmctld) can run without elevated privileges, |
| so it is recommended that a user "slurm" be created for it. For testing |
| purposes any user name can be used. |
| <P> |
| <input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B> |
| <P> |
| |
| <H2>Group ID Caching</H2> |
| |
| If you have a slow NIS environment, big parallel jobs take a long time |
| to start up (and may eventually time-out) because the NIS server(s) |
| may not be able to quickly respond to simultaneous requests from |
| multiple slurmd's. You can instruct slurmd to cache /etc/groups |
| entries to prevent this from happening by setting |
| <B>CacheGroups</B>=1. Reconfiguring ("scontrol reconfig") with |
| <B>CacheGroups</B>=0 will cause slurmd to purge the cache. Select one |
| value for <B>CacheGroups</B>:<BR> |
| <input type="radio" name="cache_groups" value="0" checked> |
| <B>0</B>: for normal environment.<BR> |
| <input type="radio" name="cache_groups" value="1"> |
| <B>1</B>: for slow NIS environment. |
| <P> |
| WARNING: The group ID cache does not try to keep itself in sync with |
| the system. You MUST run "scontrol reconfig" to update the cache |
| after making any changes to system password or group databases. |
| <P> |
| |
| <H2>SLURM Port Numbers</H2> |
| The SLURM controller (slurmctld) requires a unique port for communications |
| as do the SLURM compute node daemons (slurmd). If not set, slurm ports |
| are set by checking for an entry in <I>/etc/services</I> and if that |
| fails by using an interval default set at SLURM build time. |
| <P> |
| <input type="text" name="slurmctld_port" value="6817"> <B>SlurmctldPort</B> |
| <P> |
| <input type="text" name="slurmd_port" value="6818"> <B>SlurmdPort</B> |
| <P> |
| |
| <H2>Authentication and Security</H2> |
| Define the method used for authenticating communicating between SLURM components.<BR> |
| Select one value for <B>AuthType</B>:<BR> |
| <input type="radio" name="auth_type" value="none"> <B>None</B>: No authentication, |
| not recommended production use<br> |
| <input type="radio" name="auth_type" value="authd"> <B>Authd</B>: Brent Chun's |
| <A href="http://www.theether.org/authd/">authd</A><BR> |
| <input type="radio" name="auth_type" value="munge" checked> <B>Munge</B>: LLNL's |
| <A href="http://home.gna.org/munge/">Munge</A><BR> |
| <P> |
| Library used for job step cryptographic signature generation.<BR> |
| Select one value for <B>CryptoType</B>:<BR> |
| <input type="radio" name="crypto_type" value="munge" checked><B>Munge</B>: LLNL's |
| <A href="http://home.gna.org/munge/">Munge</A> (has Gnu Public License)<BR> |
| <input type="radio" name="crypto_type" value="openssl"> <B>OpenSSL</B>: |
| <A href="http://www.openssl.org/">OpenSSL</A> |
| <P> |
| Define the location of public and private keys used by SLURM's |
| cryptographic signature generation plugin (CryptoType).<br> |
| <b>These values are only used if CryptoType=OpenSSL.</b><br> |
| These files need to be generated by the SLURM administrator. |
| Specify fully qualified pathnames. |
| <P> |
| <input type="text" name="private_key"> <B>JobCredentialPrivateKey</B> |
| <P> |
| <input type="text" name="public_key"> <B>JobCredentialPublicCertificate</B> |
| <P> |
| |
| <H2>State Preservation</H2> |
| Define the location of a directory where the slurmctld daemon saves its state. |
| This should be a fully qualified pathname which can be read and written to |
| by the SLURM user on both the control machine and backup controller (if configured). |
| The location of a directory where slurmd saves state should also be defined. |
| This must be a unique directory on each compute server (local disk). |
| The use of a highly reliable file system (e.g. RAID) is recommended. |
| <P> |
| <input type="text" name="state_save_location" value="/tmp"> <B>StateSaveLocation</B>: |
| Slurmctld state save directory <B>Must be writable by both ControlMachine and BackupController</B> |
| <P> |
| <input type="text" name="slurmd_spool_dir" value="/tmp/slurmd"> <B>SlurmdSpoolDir</B>: |
| Slurmd state save directory |
| <P> |
| Define when a non-responding (DOWN) node is returned to service.<BR> |
| Select one value for <B>ReturnToService</B>:<BR> |
| <input type="radio" name="return_to_service" value="0"> |
| <B>0</B>: When explicitly restored to service by an administrator.<BR> |
| <input type="radio" name="return_to_service" value="1" checked> |
| <B>1</B>: Automatically, when slurmd daemon registers with valid configuration<BR> |
| <P> |
| |
| <H2>Scheduling</H2> |
| Define the mechanism to be used for controlling job ordering.<BR> |
| Select one value for <B>SchedulerType</B>:<BR> |
| <input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In |
| First-Out (FIFO)<BR> |
| <input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>: |
| FIFO with backfill<BR> |
| <input type="radio" name="sched_type" value="gang"> <B>Gang</B>: Gang scheduling |
| (time-slicing for parallel jobs)<BR> |
| <input type="radio" name="sched_type" value="wiki"> <B>Wiki</B>: Wiki interface |
| to Maui (configuration parameter <B>SchedulerPort</B> must specified)<BR> |
| <input type="radio" name="sched_type" value="wiki2"> <B>Wiki2</B>: Wiki interface |
| to Moab (configuration parameter <B>SchedulerPort</B> must specified)<BR> |
| <P> |
| <input type="text" name="scheduler_port" value="7321"> <B>SchedulerPort</B>: scheduler |
| communications port (used by Wiki only) |
| <P> |
| Define what node configuration (sockets, cores, memory, etc.) should be used. |
| Using values defined in the configuration file will provide faster scheduling.<BR> |
| Select one value for <B>FastSchedule</B>:<BR> |
| <input type="radio" name="fast_schedule" value="1" checked> |
| <B>1</B>: Use node configuration values defined in configuration file<BR> |
| <input type="radio" name="fast_schedule" value="0"> |
| <B>0</B>: Use node configuration values actually found on each node |
| (if configured with with gang scheduling or allocation of individual |
| processors to jobs rather than only whole node allocations, the processor |
| count on the node should match the configured value to avoid having extra |
| processors left idle) |
| <P> |
| |
| <H2>Interconnect</H2> |
| Define the node interconnect used.<BR> |
| Select one value for <B>SwitchType</B>:<BR> |
| <input type="radio" name="switch_type" value="elan"> <B>Elan</B>: Quadrics Elan3 or Elan4<BR> |
| <input type="radio" name="switch_type" value="federation"> <B>Federation</B>: IBM |
| Federation Switch<BR> |
| <input type="radio" name="switch_type" value="none" checked> <B>None</B>: No special |
| handling required (InfiniBand, Myrinet, Ethernet, etc.)<BR> |
| <P> |
| |
| <H2>Default MPI Type</H2> |
| Specify the type of MPI to be used by default. SLURM will configure environment |
| variables accordingly. Users can over-ride this specification with an srun option.<BR> |
| Select one value for <B>MpiDefault</B>:<BR> |
| <input type="radio" name="mpi_default" value="mpichgm"> <B>MPICH-GM</B><BR> |
| <input type="radio" name="mpi_default" value="mpichmx"> <B>MPICH-MX</B><BR> |
| <input type="radio" name="mpi_default" value="mpich1_p4"> <B>MPICH1-P4</B><BR> |
| <input type="radio" name="mpi_default" value="mpich1_shmem"> <B>MPICH1-SHMEM</B>: |
| This also works for MVAPICH-SHMEM.<BR> |
| <input type="radio" name="mpi_default" value="mvapich"> <B>MVAPICH</B><BR> |
| <input type="radio" name="mpi_default" value="none" checked> <B>None</B>: |
| +This works for most other MPI types including MPICH2, LAM MPI and Open MPI.<BR> |
| <P> |
| |
| <H2>Process Tracking</H2> |
| Define the algorithm used to identify which processes are associated with a |
| given job. This is used signal, kill, and account for the processes associated |
| with a job step.<BR> |
| Select one value for <B>ProctrackType</B>:<BR> |
| <input type="radio" name="proctrack_type" value="aix"> <B>AIX</B>: Use AIX kernel |
| extension, recommended for AIX systems<BR> |
| <input type="radio" name="proctrack_type" value="pgid" checked> <B>Pgid</B>: Use Unix |
| Process Group ID, processes changing their process group ID can escape from SLURM |
| control<BR> |
| <input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use |
| parent process ID records, required for MPICH-GM use, processes can escape |
| from SLURM control<BR> |
| <input type="radio" name="proctrack_type" value="rms"> <B>RMS</B>: Use Quadrics |
| kernel infrastructure, recommended for systems where this is available<BR> |
| <input type="radio" name="proctrack_type" value="sgi_job"> <B>SGI's PAGG |
| module</B>: Use <A HREF="http://oss.sgi.com/projects/pagg/">SGI's Process |
| Aggregates (PAGG) kernel module</A>, recommended where available<BR> |
| <P> |
| |
| <H2>Resource Selection</H2> |
| Define resource (node) selection algorithm to be used.<BR> |
| Select one value for <B>SelectType</B>:<BR> |
| <input type="radio" name="select_type" value="cons_res"> |
| <B>Cons_res</B>: Allocate individual processors and memory<BR> |
| <DL> |
| <DL> |
| <DT><B>SelectTypeParameters</B> (As used by <I>SelectType=Cons_res</I> only): |
| <DD> Note: The -E extension for sockets, cores, and threads |
| are ignored within the node allocation mechanism |
| when CR_CPU or CR_CPU_MEMORY is selected. |
| They are considered to compute the total number of |
| tasks when -n is not specified |
| <DD> Note: CR_MEMORY assumes MaxShare value of one of higher |
| <DT> <input type="radio" name="cons_res_params" value="CR_CPU" checked |
| onClick="javascript:set_select_type(this, 'cons_res')"> |
| <B>CR_CPU</B>: (default) |
| CPUs as consumable resources. |
| <DD> No notion of sockets, cores, or threads. |
| On a multi-core system, cores will be considered CPUs. |
| On a multi-core/hyperthread system, threads will be considered CPUs. |
| On a single-core systems CPUs are CPUs. ;-) |
| <DT> <input type="radio" name="cons_res_params" value="CR_Socket" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_Socket</B>: Sockets as a consumable resource. |
| <DT> <input type="radio" name="cons_res_params" value="CR_Core" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_Core</B>: Cores as a consumable resource. |
| <DT> <input type="radio" name="cons_res_params" value="CR_Memory" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_Memory</B>: Memory as a consumable resource. |
| <DD> Note: CR_Memory assumes MaxShare value of one of higher |
| <DT> <input type="radio" name="cons_res_params" value="CR_CPU_Memory" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_CPU_Memory</B>: |
| CPU and Memory as consumable resources. |
| <DT> <input type="radio" name="cons_res_params" value="CR_Socket_Memory" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_Socket_Memory</B>: |
| Socket and Memory as consumable resources. |
| <DT> <input type="radio" name="cons_res_params" value="CR_Core_Memory" |
| onClick="javascript:set_select_type(this)"> |
| <B>CR_Core_Memory</B>: |
| Core and Memory as consumable resources. |
| </DL> |
| </DL> |
| <input type="radio" name="select_type" value="linear" checked> |
| <B>Linear</B>: Node-base |
| resource allocation, does not manage individual processor allocation<BR> |
| <input type="radio" name="select_type" value="bluegene"> |
| <B>BlueGene</B>: For IBM Blue Gene systems only<BR> |
| <P> |
| |
| <H2>Task Launch</H2> |
| Define a task launch plugin. This may be used to |
| provide resource management within a node (e.g. pinning |
| tasks to specific processors). |
| Select one value for <B>TaskPlugin</B>:<BR> |
| <input type="radio" name="task_plugin" value="none" checked> <B>None</B>: No task launch actions<BR> |
| <input type="radio" name="task_plugin" value="affinity"> <B>Affinity</B>: |
| CPU affinity support |
| (see srun man pages for the --cpu_bind, --mem_bind, and -E options) |
| <DL><DL> |
| <DT><B>TaskPluginParam</B> (As used by <I>TaskPlugin=Affinity</I> only): |
| <DT><input type="radio" name="task_plugin_param" value="Cpusets"> |
| <B>Cpusets</B>: Use <I>cpusets</I> to control task binding. |
| <DT><input type="radio" name="task_plugin_param" value="Sched" checked> |
| <B>Sched</B>: Use <I>sched_setaffinity</I> or <I>plpa_sched_setaffinity</I> |
| (if available) to bind tasks to processors. This is the default mode of |
| operation. |
| </DL></DL> |
| <P> |
| |
| <H2>Prolog and Epilog</H2> |
| <P> |
| <B>Prolog/Epilog</B>: Fully qualified path that will be executed as |
| root on every node of a user's job before the job's tasks |
| will be initiated there and after that job has terminated. |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="epilog" value="" size=40> <B>Prolog</B> |
| <DT> <input type="text" name="prolog" value="" size=40> <B>Epilog</B> |
| </DL> |
| |
| <P> |
| <B>SrunProlog/Epilog</B>: Fully qualified path to be executed by srun at |
| job step initiation and termination. These parameters may be overridden by |
| srun's --prolog and --epilog options |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="srun_prolog" value="" size=40> <B>SrunProlog</B> |
| <DT> <input type="text" name="srun_epilog" value="" size=40> <B>SrunEpilog</B> |
| </DL> |
| |
| <P> |
| <B>TaskProlog/Epilog</B>: Fully qualified path to be executed as the user |
| before each task begins execution and after each task terminates. |
| These parameters are optional. |
| <DL> |
| <DT> <input type="text" name="task_prolog" value="" size=40> <B>TaskProlog</B> |
| <DT> <input type="text" name="task_epilog" value="" size=40> <B>TaskEpilog</B> |
| </DL> |
| |
| <H2>Event Logging</H2> |
| Slurmctld and slurmd daemons can each be configured with different |
| levels of logging verbosity from 0 (quiet) to 7 (extremely verbose). |
| Each may also be configured to use debug files. Use fully qualified |
| pathnames for the files. |
| <P> |
| <input type="text" name="slurmctld_debug" value="3"> <B>SlurmctldDebug</B> (0 to 7) |
| <P> |
| <input type="text" name="slurmctld_logfile" value=""> <B>SlurmctldLogFile</B> (default is none, log goes to syslog) |
| <P> |
| <input type="text" name="slurmd_debug" value="3"> <B>SlurmdDebug</B> (0 to 7) |
| <P> |
| <input type="text" name="slurmd_logfile" value=""> <B>SlurmdLogFile</B> (default is none, |
| log goes to syslog, string "%h" in name gets replaced with hostname) |
| <P> |
| |
| <H2>Job Completion Logging</H2> |
| Define the job completion logging mechanism to be used.<BR> |
| Select one value for <B>JobCompType</B>:<BR> |
| <input type="radio" name="job_comp_type" value="none" checked> <B>None</B>: |
| No job completion logging<BR> |
| <input type="radio" name="job_comp_type" value="filetxt"> <B>FileTxt</B>: |
| Write job completion status to a text file<BR> |
| <input type="radio" name="job_comp_type" value="script"> <B>Script</B>: |
| Use an arbitrary script to log job completion<BR> |
| <input type="radio" name="job_comp_type" value="mysql"> <B>MySQL</B>: |
| Write completion status to a MySQL database<BR> |
| <input type="radio" name="job_comp_type" value="pgsql"> <B>PGSQL</B>: |
| Write completion status to a PostreSQL database<BR> |
| <input type="radio" name="job_comp_type" value="slurmdbd"> <B>SlurmDBD</B>: |
| Write completion status to Slurm a database daemon (serving multiple Slurm clusters) |
| which will write to some database<BR> |
| <P> |
| <input type="text" name="job_comp_loc" value=""> <B>JobCompLoc</B>: |
| This is the location of the text file to be written to (if JobCompType=filetst) |
| or the script to be run (if JobCompType=script) or database name (for other values |
| of JobCompType). |
| <p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br> |
| <input type="text" name="job_comp_host" value=""> <B>JobCompHost</B>: |
| Host the database is running on for Job completion<br> |
| <input type="text" name="job_comp_port" value=""> <B>JobCompPort</B>: |
| Port the database server is listening on for Job completion<br> |
| <input type="text" name="job_comp_user" value=""> <B>JobCompUser</B>: |
| User we are to use to talk to the database for Job completion<br> |
| <input type="text" name="job_comp_pass" value=""> <B>JobCompPass</B>: |
| Password we are to use to talk to the database for Job completion<br> |
| <P> |
| |
| <H2>Job Accounting Gather</H2> |
| SLURM accounts for resource use per job. System specifics can be polled |
| determined by system type<BR> |
| Select one value for <B>JobAcctGatherType</B>:<BR> |
| <input type="radio" name="job_acct_gather_type" value="none" checked> <B>None</B>: No |
| job accounting<BR> |
| <input type="radio" name="job_acct_gather_type" value="aix"> <B>AIX</B>: Specifc |
| AIX process table information gathered, use with AIX systems only<BR> |
| <input type="radio" name="job_acct_gather_type" value="linux"> <B>Linux</B>: Specifc |
| Linux process table information gathered, use with Linux systems only<BR> |
| <input type="text" name="job_acct_gather_frequency" value="30"> <B>JobAcctGatherFrequency</B>: |
| polling interval in seconds. Zero disables periodic sampling.<BR> |
| <P> |
| |
| <H2>Job Accounting Storage</H2> |
| Used with the Job Accounting Gather SLURM can store the accounting information in many different fashions. Fill in your systems choice here<BR> |
| Select one value for <B>AccountingStorageType</B>:<BR> |
| <input type="radio" name="accounting_storage_type" value="none" checked> <B>None</B>: |
| No job accounting storage<BR> |
| <input type="radio" name="accounting_storage_type" value="filetxt"> <B>FileTxt</B>: |
| Write job accounting to a text file<BR> |
| <input type="radio" name="accounting_storage_type" value="gold"> <B>Gold</B>: |
| Write completion status to Gold database daemon which can securely |
| save the data from many Slurm managed clusters into a common database<BR> |
| <input type="radio" name="accounting_storage_type" value="mysql"> <B>MySQL</B>: |
| Write job accounting to a MySQL database<BR> |
| <input type="radio" name="accounting_storage_type" value="pgsql"> <B>PGSQL</B>: |
| Write job accounting to a PostreSQL database<BR> |
| <input type="radio" name="accounting_storage_type" value="slurmdbd"> <B>SlurmDBD</B>: |
| Write job accounting to Slurm DBD (database daemon) which can securely |
| save the data from many Slurm managed clusters into a common database<BR> |
| <input type="text" name="accounting_storage_loc" value=""> <B>AccountingStorageLoc</B>: |
| Location specification or database name. |
| This is the location of the text file to be written to (used by Log only). |
| Use a fully qualified pathname. If using a database it is the name of the database you will use or create for the stored data.<br> |
| <p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br> |
| <input type="text" name="accounting_storage_host" value=""> <B>AccountingStorageHost</B>: |
| Host the database is running on for Job Accounting<br> |
| <input type="text" name="accounting_storage_port" value=""> <B>AccountingStoragePort</B>: |
| Port the database server is listening on for Job Accounting<br> |
| <input type="text" name="accounting_storage_user" value=""> <B>AccountingStorageUser</B>: |
| User we are to use to talk to the database for Job Accounting<br> |
| <input type="text" name="accounting_storage_pass" value=""> <B>AccountingStoragePass</B>: |
| Password we are to use to talk to the database for Job Accounting. |
| In the case of SlurmDBD, this will be an alternate socket name for use with a Munge |
| daemon providing enterprise-wide authentication (while the default Munge socket |
| would provide cluster-wide authentication only).<br> |
| <input type="text" name="cluster_name" value="cluster"> <B>ClusterName</B>: |
| Name to be recorded in database for jobs from this cluster. |
| This is important if a single database is used to record information |
| from multiple Slurm-managed clusters.<br> |
| |
| <P> |
| |
| <H2>Process ID Logging</H2> |
| Define the location into which we can record the daemon's process ID. |
| This is used for locate the appropriate daemon for signalling. |
| Specify a specify the fully qualified pathname for the file. |
| <P> |
| <input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid"> |
| <B>SlurmctldPidFile</B> |
| <P> |
| <input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid"> |
| <B>SlurmdPidFile</B> |
| <P> |
| |
| <H2>Timers</H2> |
| SLURM has a variety of timers to control when to consider a node DOWN, |
| when to purge job records, how long to give a job to gracefully terminate, etc. |
| <P> |
| <input type="text" name="slurmctld_timeout" value="300"> |
| <B>SlurmctldTimeout</B>: How many seconds the backup controller waits before |
| becoming the master controller |
| <P> |
| <input type="text" name="slurmd_timeout" value="300"> |
| <B>SlurmdTimeout</B>: How many seconds the SLURM controller waits for the slurmd |
| to respond to a request before considering the node DOWN |
| <P> |
| <input type="text" name="inactive_limit" value="0"> |
| <B>InactiveLimit</B>: How many seconds the SLURM controller waits for srun |
| commands to respond before considering the job or job step inactive and |
| terminating it. A value of zero indicates unlimited wait |
| <P> |
| <input type="text" name="min_job_age" value="300"> |
| <B>MinJobAge</B>: How many seconds the SLURM controller waits after a |
| job terminates before purging its record. A record of the job will |
| persist in job completion and/or accounting records indefinitely, |
| but will no longer be visible with the squeue command after puring |
| <P> |
| <input type="text" name="kill_wait" value="30"> |
| <B>KillWait</B>: How many seconds a job is given to gracefully terminate |
| after reaching its time limit and being sent SIGTERM before sending |
| a SIGKILLL |
| <P> |
| <input type="text" name="wait_time" value="0"> |
| <B>WaitTime</B>: How many seconds after a job step's first task terminates |
| before terminating all remaining tasks. A value of zero indicates unlimited wait |
| <P> |
| |
| <BR> |
| <BR> |
| <input type=button value="Submit" onClick="javascript:displayfile()"> |
| <input type=reset value="Reset Form"> |
| <P> |
| </FORM> |
| <HR> |
| <P class="footer">LLNL-WEB-402631<BR> |
| Last modified 13 May 2009</P> |
| </BODY> |
| |