doc/slides/job-initiation/slides.tex - SchedMD/slurm - Git at Google

 %
 % $Id$
 %

 % slides documenting job startup design
 % not to detailed, but some basic info...

 \documentclass[article,letter,landscape]{seminar}


 \usepackage{pstcol}
 \usepackage{slidesec}
 \usepackage{semcolor}

 \input{seminar.bug}
 \input{seminar.bg2}

 \usepackage{graphics}
 \usepackage{fancybox}
 \usepackage{fancyhdr}
 \usepackage{epsfig}
 \usepackage{epsf}

 \pagestyle{empty}
 \twoup[1]

 \newcommand{\slurmd}{{\tt slurmd}}
 \newcommand{\slurmctld}{{\tt slurmctld}}
 \newcommand{\srun}{{\tt srun}}
 \newcommand{\sq}{{\tt squeue}}

 \newcommand{\entrylabel}[1]{\mbox{{\tt #1:}}\hfil}
 \newenvironment{entry}
 {\begin{list}{}%
 	{\renewcommand{\makelabel}{\entrylabel}%
 		\setlength{\labelwidth}{50pt}%
 			\setlength{\leftmargin}{\labelwidth}%
 			\addtolength{\leftmargin}{\labelsep}%
 	}%
 }%
 {\end{list}}

 \newlength{\Mylen}
 \newcommand{\Lentrylabel}[1]{%
 	\settowidth{\Mylen}{{\tt #1:}}%
 		\ifthenelse{\lengthtest{\Mylen > \labelwidth}}%
 		{\parbox[b]{\labelwidth}%         term > labelwidth
 			{\makebox[0pt][l]{{\tt #1:}}\\}}%
 			{{\tt #1:}}%                   term < labelwidth
 			\hfil\relax}
 			\newenvironment{Lentry}
 {\renewcommand{\entrylabel}{\Lentrylabel}%
 	\begin{entry}}
 {\end{entry}}


 % Headers and footers personalization using the `fancyhdr' package
 \fancyhf{} % Clear all fields
 \renewcommand{\headrulewidth}{0.2mm}
 \renewcommand{\footrulewidth}{0.2mm}
 \fancyhead[C]{\Large\textbf{SLURM Design - Job Initiation}}
 \fancyfoot[L]{\includegraphics[scale=0.075]{penguin.eps}\\\tiny LINUX}
 \fancyfoot[R]{\includegraphics[scale=0.2]{llnl.ps}\\\tiny LLNL}
 \fancyfoot[C]{\tiny Page \theslide}


 % Create room for headers and footers
 \renewcommand{\slidetopmargin}{2cm}
 \renewcommand{\slidebottommargin}{3cm}

 % Center horizontally the headers and footers (see seminar.bug)
 \renewcommand{\headwidth}{\textwidth}

 % To adjust the frame length to the header and footer ones
 \autoslidemarginstrue

 % Hook to tell dvips to print in landscape mode
 \def\printlandscape{\special{landscape}}

 % Possibilities: shadow, oval, double, none, plain
 \slideframe{none}

 \definecolor{Blue}{rgb}{0.,0.,1.}
 \definecolor{Gold}{rgb}{1.,0.84,0.}
 \definecolor{Pink}{rgb}{1.,0.75,0.8}
 \begin{document}

 \slidepagestyle{fancy}

 % The presentation begins here.

 \begin{slide}
   \slideheading{Design Constraints}
   \mbox{}\\
   \begin{center}
   Overall Design Goals\\
   \begin{itemize}
      \item Scalable
      \item Fault Tolerant
      \item Simplified
      \item Administrator Friendly
      \item Secure
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Design Constraints}
   \mbox{}\\
   \begin{center}
   Job Initiation Design Goals\\
   \begin{itemize}
     \item Run jobs in several different ``modes'':
     \begin{itemize}
       \item interactive
       \item batch
       \item allocate only (then run under allocation)
     \end{itemize}
     \item Support multiple runs under a single allocation
     \item Ability to attach and detach from running jobs
     \begin{itemize}
       \item for job control and fault tolerance
     \end{itemize}
     \item Specification of partial or full nodelists
     \item Support for prolog/epilog
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Security}
   \begin{center}
   \begin{itemize}
   \item Link Encryption
   \begin{itemize}
     \item mongo encrypts all communication with a cluster-wide
           shared key.
     \item access to shared key will be group-limited.
   \end{itemize}
   \item Job Credential
   \begin{itemize}
     \item \slurmctld\ will have private keys for every \slurmd\
           and will hash Job Credential against this private key.
     \item \slurmd 's will verify \srun\ connections using these
           job credentials.
   \end{itemize}
   \item \slurmd\ and \slurmctld\ will utilize PAM for configurable authentication
         over the encrypted link.
   \begin{itemize}
     \item \slurmd\ will fall through to PAM if credential is null.
   \end{itemize}
   \end{itemize}
   \end{center}
 \end{slide}


 \begin{slide}
   \slideheading{\slurmd\ Initialization}
   \begin{center}
   \begin{itemize}
   \item On startup, \slurmd\ will generate a private key  and
         attempt to contact slurm controller to join the cluster.
   \item If slurm controller is not up at time of \slurmd\ intialization,
         \slurmd\ will sleep and listen on a well-defined mongo-port for status
 	requests from controller, or other entity.
   \item On first contact with controller, \slurmd\ will exchange secret key
         by setting flag {\tt REFRESH\_KEY} in the corresponding message.
   \item After private key has been exchanged, \slurmd\ will wait for
         further work
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{ Interactive Job Initiation }
   \begin{center}
   \begin{itemize}
   \item For an interactive job, \srun\ will build an allocate request
         to send to the controller, based upon user options.
   \item the {\tt MSG\_ALLOCATE\_REQ} message will contain the
         following information:
   \begin{itemize}
     \item immediate or block (flag)
     \item partition
     \item user info (username and/or uid)
     \item nprocs, nnodes, ncpus\_per\_task
     \item task distribution flag (block$|$cyclic)
   \end{itemize}
     optional:
   \begin{itemize}
     \item requested time limit
     \item constraint and requested feature list
   \end{itemize}
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation, contd ...}
   \begin{center}
   \begin{itemize}
   \item \srun\ initializes mongo with shared mongo-key and
         sends allocate request to controller.
   \item controller replies with an allocate reply message:
   \item message type MSG\_ALLOCATE\_REPLY contains:
   \begin{itemize}
     \item return and error code
     \item node list
     \item credential list (1 per node)
     \item cpus per node
   \end{itemize}
   \item if return code indicates an error, \srun\ will print
         text representation of error code on stderr and exit
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation, contd ...}
   \begin{center}
   \begin{itemize}
     \item If stdout/err is to be copied to user's terminal, \srun\ initializes
           two mongo-ports for every task and begins listening on these ports
     \item \srun\ similarly initializes mongo-ports for stdin.
     \item \srun\ formulates the list of nodes, credentials, and task
           assignments into a MSG\_JOB\_RUN\_REQ message.
  \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation: job\_run\_request}
   \begin{center}
   \begin{itemize}
    \item MSG\_JOB\_RUN\_REQ contents:
      \begin{itemize}
       \item job credential
       \item jobid
       \item command line
       \item user info (username and/or uid)
       \item ntasks (number of tasks to initiate on this node)
       \item stdin, stdout, stderr locations (mongo-ports or files)
             (per task)
     \end{itemize}
     Optional contents (prefixed by id):
     \begin{itemize}
       \item cwd
       \item environment
       \item stop time
       \item signal handling
     \end{itemize}
     \end{itemize}
   \end{center}
 \end{slide}


 \begin{slide}
   \slideheading{Interactive Job Initiation: cont'd ... }
   \begin{center}
   \begin{itemize}
     \item One job run request message is built per node - \srun\
           sends these messages to all nodes using a {\tt mongo\_sendto:}\\
 	  \begin{verbatim} md = mongo_sendto <host>:<mongo_port>, &addr
 	  \end{verbatim}
     \item \srun\ then waits for reply messages from all \slurmd 's
           with {\tt mongo\_recvfrom} or {\tt mongo\_select}
     \item After a timeout, \srun\ sends status update to \slurmctld\
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation: \slurmd}
   \begin{center}
   \begin{itemize}
     \item \slurmd\ receives a message from \srun\ and spawns a
           request thread ({\tt req\_thr})
     \item {\tt req\_thr} reads message header, sees that the message is
           of type MSG\_JOB\_RUN\_REQUEST and executes the {\tt job\_thr()}
 	  function.
     \item {\tt job\_thr} unpacks message body, then:
     \begin{itemize}
       \item if credential is non-null, decrypt credential to authorize job
       \item null credential: fall through to PAM
       \item if job and user are authorized, instantiate a task thread for
             each task requested and send MSG\_JOB\_RUN\_REPLY.
     \end{itemize}
  \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation: \slurmd }
   \begin{center}
   \begin{itemize}
     \item {\tt task\_thr} examines std\{in,err,out\} locations for
           this task and opens appropriately
     \begin{itemize}
       \item {\tt /dev/null}
       \item host:mongo port
       \item filename
     \end{itemize}
     \item {\tt task\_thr} dups stdout/err/in fds as appropriate.
     \item Each {\tt task\_thr } then does fork(), setuid(), chdir()
           (if req'd), setup environment, and exec() of the user's executable.
     \item {\tt task\_thr} will then wait() for its child.
     \item {\tt job\_thr} continues to listen for job control information
           over original connection (signals, cancel, etc.)
     \item {\tt job\_thr} on node 0 of job will monitor connection to
           \srun\ and will initiate a new \srun\ on local node if
 	  necessary.
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Interactive Job Initiation, cont'd ...}
   \begin{center}
   \begin{itemize}
     \item \srun\ remains attached to user's terminal, copying stderr
           and stdout (optionally prepending task no) and forwarding signals
     \item \srun\ transparently forwards signals generated on the
           users terminal to remote tasks (MSG\_SIGNAL)
     \item \srun\ also recieves status information on original channel:
     \begin{itemize}
       \item MSG\_TASK\_EXITED
     \end{itemize}
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Batch Job Initiation}
   \begin{center}
   \begin{itemize}
     \item a batch submission consists of a script that will be invoked
           on only one node in the allocation
     \item \srun\ sends MSG\_BATCH\_SUBMIT\_REQ to \slurmctld , which contains
     \begin{itemize}
       \item user info
       \item nnodes, nprocs, cpus\_per\_task
       \item script path, environment, cwd
     \end{itemize}
     Optional parameters:
     \begin{itemize}
       \item requested timelimit (default set by config)
       \item filenames for stdout/err (default jobid.o,e)
       \item stdin location (default {\tt /dev/null})
       \item signal handling
     \end{itemize}
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Batch Job Initiation, cont'd ...}
   \begin{center}
   \begin{itemize}
     \item \slurmctld\ sends MSG\_JOB\_RUN\_REQ to first node in job
           when it has allocated nodes
     \item Job run request contents:
     \begin{itemize}
       \item credential, jobid, userinfo
       \item stdin/out/err locations (always files)
       \item ntasks $=1$
       \item command line $=$ ``{\tt /path/to/script args}''
       \item stop time
     \end{itemize}
   \item \slurmd\ handles job run request as previously explained
   \item \slurmctld\ will keep connection open for this job, waiting
         for task exit message, at which point job is complete.
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Batch Job Initiation, cont'd ...}
   \begin{center}
   \begin{itemize}
     \item Presence of SLURM\_JOBID env var indicates to \srun\ that it
           is running under an allocation (has access to resources).
     \item SLURM\_HOSTLIST will also be set to the list of nodes allocated
     \item \srun\ must query \slurmctld , however, for two reasons:
     \begin{itemize}
       \item \srun\ doesn't know number of cpus for each node
       \item safer for \srun\ to obtain switch resources from controller
     \end{itemize}
     \item Therefore, \srun\ will first query \slurmctld\ for details of
           its allocation
     \item \srun\ will then process command line options relative to this
           allocation. If a subset of nodes is specified, \srun\ will need
 	  to requery the controller for switch resources. (We may be
 	  able to get away without doing this $2^{nd}$ query)
   \end{itemize}
   \end{center}
 \end{slide}

 \begin{slide}
   \slideheading{Allocate Mode}
   \begin{center}
   \begin{itemize}
   \item Allocates a set of resources to the
         user then spawns a shell with access to those resources.
 	(SLURM\_JOBID and SLURM\_HOSTLIST are set)
   \item initiating \srun\ will need to {\tt wait()} for exit
         of the shell to notify \slurmctld\ that job is complete.
   \item subsequent invocations of \srun\ behave as in the batch run
         case
   \end{itemize}
   \end{center}
 \end{slide}

 %\begin{slide}
 %  \slideheading{Attaching to a Job}
 %  \begin{center}
 %  \begin{itemize}
 %    \item User specifies job name or jobid on command line
 %    \item \srun\ connects to \slurmctld\ and sends a MSG\_JOB\_ATTACH\_REQ:
 %    \begin{itemize}
 %      \item job id
 %      \item job name
 %    \end{itemize}
 %    \item MSG\_JOB\_ATTACH\_REPLY contains similar information as
 %          MSG\_ALLOCATE\_REPLY
 %    \item \srun\ now has credentials to contact \slurmds\ in job
 %  \end{itemize}
 %  \end{center}
 %\end{slide}

 %\begin{slide}
 %  \slideheading{}
 %  \begin{center}
 %  \begin{itemize}
 %  \end{itemize}
 %  \end{center}
 %\end{slide}

 \end{document}
	%
	% $Id$
	%

	% slides documenting job startup design
	% not to detailed, but some basic info...

	\documentclass[article,letter,landscape]{seminar}


	\usepackage{pstcol}
	\usepackage{slidesec}
	\usepackage{semcolor}

	\input{seminar.bug}
	\input{seminar.bg2}

	\usepackage{graphics}
	\usepackage{fancybox}
	\usepackage{fancyhdr}
	\usepackage{epsfig}
	\usepackage{epsf}

	\pagestyle{empty}
	\twoup[1]

	\newcommand{\slurmd}{{\tt slurmd}}
	\newcommand{\slurmctld}{{\tt slurmctld}}
	\newcommand{\srun}{{\tt srun}}
	\newcommand{\sq}{{\tt squeue}}

	\newcommand{\entrylabel}[1]{\mbox{{\tt #1:}}\hfil}
	\newenvironment{entry}
	{\begin{list}{}%
	{\renewcommand{\makelabel}{\entrylabel}%
	\setlength{\labelwidth}{50pt}%
	\setlength{\leftmargin}{\labelwidth}%
	\addtolength{\leftmargin}{\labelsep}%
	}%
	}%
	{\end{list}}

	\newlength{\Mylen}
	\newcommand{\Lentrylabel}[1]{%
	\settowidth{\Mylen}{{\tt #1:}}%
	\ifthenelse{\lengthtest{\Mylen > \labelwidth}}%
	{\parbox[b]{\labelwidth}% term > labelwidth
	{\makebox[0pt][l]{{\tt #1:}}\\}}%
	{{\tt #1:}}% term < labelwidth
	\hfil\relax}
	\newenvironment{Lentry}
	{\renewcommand{\entrylabel}{\Lentrylabel}%
	\begin{entry}}
	{\end{entry}}



	% Headers and footers personalization using the `fancyhdr' package
	\fancyhf{} % Clear all fields
	\renewcommand{\headrulewidth}{0.2mm}
	\renewcommand{\footrulewidth}{0.2mm}
	\fancyhead[C]{\Large\textbf{SLURM Design - Job Initiation}}
	\fancyfoot[L]{\includegraphics[scale=0.075]{penguin.eps}\\\tiny LINUX}
	\fancyfoot[R]{\includegraphics[scale=0.2]{llnl.ps}\\\tiny LLNL}
	\fancyfoot[C]{\tiny Page \theslide}


	% Create room for headers and footers
	\renewcommand{\slidetopmargin}{2cm}
	\renewcommand{\slidebottommargin}{3cm}

	% Center horizontally the headers and footers (see seminar.bug)
	\renewcommand{\headwidth}{\textwidth}

	% To adjust the frame length to the header and footer ones
	\autoslidemarginstrue

	% Hook to tell dvips to print in landscape mode
	\def\printlandscape{\special{landscape}}

	% Possibilities: shadow, oval, double, none, plain
	\slideframe{none}

	\definecolor{Blue}{rgb}{0.,0.,1.}
	\definecolor{Gold}{rgb}{1.,0.84,0.}
	\definecolor{Pink}{rgb}{1.,0.75,0.8}
	\begin{document}

	\slidepagestyle{fancy}

	% The presentation begins here.

	\begin{slide}
	\slideheading{Design Constraints}
	\mbox{}\\
	\begin{center}
	Overall Design Goals\\
	\begin{itemize}
	\item Scalable
	\item Fault Tolerant
	\item Simplified
	\item Administrator Friendly
	\item Secure
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Design Constraints}
	\mbox{}\\
	\begin{center}
	Job Initiation Design Goals\\
	\begin{itemize}
	\item Run jobs in several different ``modes'':
	\begin{itemize}
	\item interactive
	\item batch
	\item allocate only (then run under allocation)
	\end{itemize}
	\item Support multiple runs under a single allocation
	\item Ability to attach and detach from running jobs
	\begin{itemize}
	\item for job control and fault tolerance
	\end{itemize}
	\item Specification of partial or full nodelists
	\item Support for prolog/epilog
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Security}
	\begin{center}
	\begin{itemize}
	\item Link Encryption
	\begin{itemize}
	\item mongo encrypts all communication with a cluster-wide
	shared key.
	\item access to shared key will be group-limited.
	\end{itemize}
	\item Job Credential
	\begin{itemize}
	\item \slurmctld\ will have private keys for every \slurmd\
	and will hash Job Credential against this private key.
	\item \slurmd 's will verify \srun\ connections using these
	job credentials.
	\end{itemize}
	\item \slurmd\ and \slurmctld\ will utilize PAM for configurable authentication
	over the encrypted link.
	\begin{itemize}
	\item \slurmd\ will fall through to PAM if credential is null.
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}


	\begin{slide}
	\slideheading{\slurmd\ Initialization}
	\begin{center}
	\begin{itemize}
	\item On startup, \slurmd\ will generate a private key and
	attempt to contact slurm controller to join the cluster.
	\item If slurm controller is not up at time of \slurmd\ intialization,
	\slurmd\ will sleep and listen on a well-defined mongo-port for status
	requests from controller, or other entity.
	\item On first contact with controller, \slurmd\ will exchange secret key
	by setting flag {\tt REFRESH\_KEY} in the corresponding message.
	\item After private key has been exchanged, \slurmd\ will wait for
	further work
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{ Interactive Job Initiation }
	\begin{center}
	\begin{itemize}
	\item For an interactive job, \srun\ will build an allocate request
	to send to the controller, based upon user options.
	\item the {\tt MSG\_ALLOCATE\_REQ} message will contain the
	following information:
	\begin{itemize}
	\item immediate or block (flag)
	\item partition
	\item user info (username and/or uid)
	\item nprocs, nnodes, ncpus\_per\_task
	\item task distribution flag (block$\|$cyclic)
	\end{itemize}
	optional:
	\begin{itemize}
	\item requested time limit
	\item constraint and requested feature list
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation, contd ...}
	\begin{center}
	\begin{itemize}
	\item \srun\ initializes mongo with shared mongo-key and
	sends allocate request to controller.
	\item controller replies with an allocate reply message:
	\item message type MSG\_ALLOCATE\_REPLY contains:
	\begin{itemize}
	\item return and error code
	\item node list
	\item credential list (1 per node)
	\item cpus per node
	\end{itemize}
	\item if return code indicates an error, \srun\ will print
	text representation of error code on stderr and exit
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation, contd ...}
	\begin{center}
	\begin{itemize}
	\item If stdout/err is to be copied to user's terminal, \srun\ initializes
	two mongo-ports for every task and begins listening on these ports
	\item \srun\ similarly initializes mongo-ports for stdin.
	\item \srun\ formulates the list of nodes, credentials, and task
	assignments into a MSG\_JOB\_RUN\_REQ message.
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation: job\_run\_request}
	\begin{center}
	\begin{itemize}
	\item MSG\_JOB\_RUN\_REQ contents:
	\begin{itemize}
	\item job credential
	\item jobid
	\item command line
	\item user info (username and/or uid)
	\item ntasks (number of tasks to initiate on this node)
	\item stdin, stdout, stderr locations (mongo-ports or files)
	(per task)
	\end{itemize}
	Optional contents (prefixed by id):
	\begin{itemize}
	\item cwd
	\item environment
	\item stop time
	\item signal handling
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}


	\begin{slide}
	\slideheading{Interactive Job Initiation: cont'd ... }
	\begin{center}
	\begin{itemize}
	\item One job run request message is built per node - \srun\
	sends these messages to all nodes using a {\tt mongo\_sendto:}\\
	\begin{verbatim} md = mongo_sendto <host>:<mongo_port>, &addr
	\end{verbatim}
	\item \srun\ then waits for reply messages from all \slurmd 's
	with {\tt mongo\_recvfrom} or {\tt mongo\_select}
	\item After a timeout, \srun\ sends status update to \slurmctld\
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation: \slurmd}
	\begin{center}
	\begin{itemize}
	\item \slurmd\ receives a message from \srun\ and spawns a
	request thread ({\tt req\_thr})
	\item {\tt req\_thr} reads message header, sees that the message is
	of type MSG\_JOB\_RUN\_REQUEST and executes the {\tt job\_thr()}
	function.
	\item {\tt job\_thr} unpacks message body, then:
	\begin{itemize}
	\item if credential is non-null, decrypt credential to authorize job
	\item null credential: fall through to PAM
	\item if job and user are authorized, instantiate a task thread for
	each task requested and send MSG\_JOB\_RUN\_REPLY.
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation: \slurmd }
	\begin{center}
	\begin{itemize}
	\item {\tt task\_thr} examines std\{in,err,out\} locations for
	this task and opens appropriately
	\begin{itemize}
	\item {\tt /dev/null}
	\item host:mongo port
	\item filename
	\end{itemize}
	\item {\tt task\_thr} dups stdout/err/in fds as appropriate.
	\item Each {\tt task\_thr } then does fork(), setuid(), chdir()
	(if req'd), setup environment, and exec() of the user's executable.
	\item {\tt task\_thr} will then wait() for its child.
	\item {\tt job\_thr} continues to listen for job control information
	over original connection (signals, cancel, etc.)
	\item {\tt job\_thr} on node 0 of job will monitor connection to
	\srun\ and will initiate a new \srun\ on local node if
	necessary.
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Interactive Job Initiation, cont'd ...}
	\begin{center}
	\begin{itemize}
	\item \srun\ remains attached to user's terminal, copying stderr
	and stdout (optionally prepending task no) and forwarding signals
	\item \srun\ transparently forwards signals generated on the
	users terminal to remote tasks (MSG\_SIGNAL)
	\item \srun\ also recieves status information on original channel:
	\begin{itemize}
	\item MSG\_TASK\_EXITED
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Batch Job Initiation}
	\begin{center}
	\begin{itemize}
	\item a batch submission consists of a script that will be invoked
	on only one node in the allocation
	\item \srun\ sends MSG\_BATCH\_SUBMIT\_REQ to \slurmctld , which contains
	\begin{itemize}
	\item user info
	\item nnodes, nprocs, cpus\_per\_task
	\item script path, environment, cwd
	\end{itemize}
	Optional parameters:
	\begin{itemize}
	\item requested timelimit (default set by config)
	\item filenames for stdout/err (default jobid.o,e)
	\item stdin location (default {\tt /dev/null})
	\item signal handling
	\end{itemize}
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Batch Job Initiation, cont'd ...}
	\begin{center}
	\begin{itemize}
	\item \slurmctld\ sends MSG\_JOB\_RUN\_REQ to first node in job
	when it has allocated nodes
	\item Job run request contents:
	\begin{itemize}
	\item credential, jobid, userinfo
	\item stdin/out/err locations (always files)
	\item ntasks $=1$
	\item command line $=$ ``{\tt /path/to/script args}''
	\item stop time
	\end{itemize}
	\item \slurmd\ handles job run request as previously explained
	\item \slurmctld\ will keep connection open for this job, waiting
	for task exit message, at which point job is complete.
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Batch Job Initiation, cont'd ...}
	\begin{center}
	\begin{itemize}
	\item Presence of SLURM\_JOBID env var indicates to \srun\ that it
	is running under an allocation (has access to resources).
	\item SLURM\_HOSTLIST will also be set to the list of nodes allocated
	\item \srun\ must query \slurmctld , however, for two reasons:
	\begin{itemize}
	\item \srun\ doesn't know number of cpus for each node
	\item safer for \srun\ to obtain switch resources from controller
	\end{itemize}
	\item Therefore, \srun\ will first query \slurmctld\ for details of
	its allocation
	\item \srun\ will then process command line options relative to this
	allocation. If a subset of nodes is specified, \srun\ will need
	to requery the controller for switch resources. (We may be
	able to get away without doing this $2^{nd}$ query)
	\end{itemize}
	\end{center}
	\end{slide}

	\begin{slide}
	\slideheading{Allocate Mode}
	\begin{center}
	\begin{itemize}
	\item Allocates a set of resources to the
	user then spawns a shell with access to those resources.
	(SLURM\_JOBID and SLURM\_HOSTLIST are set)
	\item initiating \srun\ will need to {\tt wait()} for exit
	of the shell to notify \slurmctld\ that job is complete.
	\item subsequent invocations of \srun\ behave as in the batch run
	case
	\end{itemize}
	\end{center}
	\end{slide}

	%\begin{slide}
	% \slideheading{Attaching to a Job}
	% \begin{center}
	% \begin{itemize}
	% \item User specifies job name or jobid on command line
	% \item \srun\ connects to \slurmctld\ and sends a MSG\_JOB\_ATTACH\_REQ:
	% \begin{itemize}
	% \item job id
	% \item job name
	% \end{itemize}
	% \item MSG\_JOB\_ATTACH\_REPLY contains similar information as
	% MSG\_ALLOCATE\_REPLY
	% \item \srun\ now has credentials to contact \slurmds\ in job
	% \end{itemize}
	% \end{center}
	%\end{slide}

	%\begin{slide}
	% \slideheading{}
	% \begin{center}
	% \begin{itemize}
	% \end{itemize}
	% \end{center}
	%\end{slide}

	\end{document}