|  | #!/usr/bin/perl | 
|  | ############################################################################### | 
|  | # | 
|  | #  sjstat - List attributes of jobs under SLURM control | 
|  | # | 
|  | ############################################################################### | 
|  | #  Copyright (C) 2007 The Regents of the University of California. | 
|  | #  Copyright (C) 2008-2009 Lawrence Livermore National Security. | 
|  | #  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | 
|  | #  Written by Phil Eckert <eckert21@llnl.gov>. | 
|  | #  CODE-OCEC-09-009. All rights reserved. | 
|  | # | 
|  | #  This file is part of Slurm, a resource management program. | 
|  | #  For details, see <https://slurm.schedmd.com/>. | 
|  | #  Please also read the included file: DISCLAIMER. | 
|  | # | 
|  | #  SLURM is free software; you can redistribute it and/or modify it under | 
|  | #  the terms of the GNU General Public License as published by the Free | 
|  | #  Software Foundation; either version 2 of the License, or (at your option) | 
|  | #  any later version. | 
|  | # | 
|  | #  In addition, as a special exception, the copyright holders give permission | 
|  | #  to link the code of portions of this program with the OpenSSL library under | 
|  | #  certain conditions as described in each individual source file, and | 
|  | #  distribute linked combinations including the two. You must obey the GNU | 
|  | #  General Public License in all respects for all of the code used other than | 
|  | #  OpenSSL. If you modify file(s) with this exception, you may extend this | 
|  | #  exception to your version of the file(s), but you are not obligated to do | 
|  | #  so. If you do not wish to do so, delete this exception statement from your | 
|  | #  version.  If you delete this exception statement from all source files in | 
|  | #  the program, then also delete it here. | 
|  | # | 
|  | #  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | #  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | #  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
|  | #  details. | 
|  | # | 
|  | #  You should have received a copy of the GNU General Public License along | 
|  | #  with SLURM; if not, write to the Free Software Foundation, Inc., | 
|  | #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
|  | # | 
|  | #  Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. | 
|  | ############################################################################### | 
|  |  | 
|  | # | 
|  | # Man page stuff. | 
|  | # | 
|  | BEGIN { | 
|  | # Just dump the man page in *roff format and exit if --roff specified. | 
|  | foreach my $arg (@ARGV) { | 
|  | if ($arg eq "--") { | 
|  | last; | 
|  | } elsif ($arg eq "--roff") { | 
|  | use Pod::Man; | 
|  | my $parser = Pod::Man->new (section => 1); | 
|  | $parser->parse_from_file($0, \*STDOUT); | 
|  | exit 0; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | use strict; | 
|  | use Getopt::Long 2.24 qw(:config no_ignore_case); | 
|  | use autouse 'Pod::Usage' => qw(pod2usage); | 
|  |  | 
|  | # | 
|  | #	Global Variables. | 
|  | # | 
|  | my ($help, $man, $pool, $running, $verbose); | 
|  | my (%MaxNodes, %MaxTime); | 
|  |  | 
|  | # | 
|  | #	Check SLURM status. | 
|  | # | 
|  | isslurmup(); | 
|  |  | 
|  | # | 
|  | #	Get user options. | 
|  | # | 
|  | get_options(); | 
|  |  | 
|  | # | 
|  | #	Get partition information from scontrol, used | 
|  | #	currently in conjunction with the sinfo data.. | 
|  | # | 
|  | do_scontrol_part(); | 
|  |  | 
|  | # | 
|  | #	Get and display the sinfo data. | 
|  | # | 
|  | do_sinfo(); | 
|  |  | 
|  | # | 
|  | #	If the -c option was entered, stop here. | 
|  | # | 
|  | exit if ($pool); | 
|  |  | 
|  | # | 
|  | #	Get and display the squeue data. | 
|  | # | 
|  | do_squeue(); | 
|  |  | 
|  | exit; | 
|  |  | 
|  |  | 
|  | # | 
|  | # Get the SLURM partitions information. | 
|  | # | 
|  | sub do_sinfo | 
|  | { | 
|  |  | 
|  | my (@s_part, @s_mem, @s_cpu, @s_feat, @s_active, @s_idle, | 
|  | @s_out, @s_total, @s_usable); | 
|  | # | 
|  | #	Get the partition and node info. | 
|  | # | 
|  | my $options = "\"%9P %7m %.4c %.22F %f\""; | 
|  |  | 
|  | my $ct = 0; | 
|  | my @sin = `sinfo -e -o $options`; | 
|  | foreach my $tmp (@sin) { | 
|  | next if ($tmp =~ /^PARTITION/); | 
|  | chomp $tmp; | 
|  | my @line = split(' ',$tmp); | 
|  | $s_part[$ct] = $line[0]; | 
|  | $s_mem[$ct]  = $line[1]; | 
|  | $s_cpu[$ct]  = $line[2]; | 
|  | # | 
|  | #		Split the status into various components. | 
|  | # | 
|  | my @fields = split(/\//, $line[3]); | 
|  | $s_active[$ct] = $fields[0]; | 
|  | $s_idle[$ct]   = $fields[1]; | 
|  | $s_out[$ct]    = $fields[2]; | 
|  | $s_total[$ct]  = $fields[3]; | 
|  |  | 
|  | $s_usable[$ct] = $s_total[$ct] - $s_out[$ct]; | 
|  |  | 
|  | $s_feat[$ct] = ($line[4] .= " "); | 
|  | $s_feat[$ct] =~ s/\(null\)//g; | 
|  | $ct++; | 
|  | } | 
|  |  | 
|  | printf("\nScheduling pool data:\n"); | 
|  | if ($verbose) { | 
|  | printf("----------------------------------------------------------------------------------\n"); | 
|  | printf("                           Total  Usable   Free   Node   Time      Other          \n"); | 
|  | printf("Pool         Memory  Cpus  Nodes   Nodes  Nodes  Limit  Limit      traits         \n"); | 
|  | printf("----------------------------------------------------------------------------------\n"); | 
|  | } else { | 
|  | printf("-------------------------------------------------------------\n"); | 
|  | printf("Pool        Memory  Cpus  Total Usable   Free  Other Traits  \n"); | 
|  | printf("-------------------------------------------------------------\n"); | 
|  | } | 
|  |  | 
|  | for (my $i = 0; $i < $ct; $i++) { | 
|  | if ($verbose) { | 
|  | my $p = $s_part[$i]; | 
|  | $p =~ s/\*//; | 
|  | printf("%-9s  %7dMb %5s %6s %7s %6s %6s %10s  %-s\n", | 
|  | $s_part[$i], $s_mem[$i], $s_cpu[$i], | 
|  | $s_total[$i], $s_usable[$i], | 
|  | $s_idle[$i], $MaxNodes{$p}, | 
|  | $MaxTime{$p}, $s_feat[$i]); | 
|  | } else { | 
|  | printf("%-9s %7dMb %5s %6s %6s %6s  %-s\n", | 
|  | $s_part[$i], $s_mem[$i], $s_cpu[$i], | 
|  | $s_total[$i], $s_usable[$i], | 
|  | $s_idle[$i], $s_feat[$i]); | 
|  | } | 
|  | } | 
|  | printf("\n"); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Get the SLURM queues. | 
|  | # | 
|  | sub do_squeue | 
|  | { | 
|  |  | 
|  | my (@s_job, @s_user, @s_nodes, @s_status, @s_begin, @s_limit, | 
|  | @s_start, @s_pool, @s_used, @s_master); | 
|  | # | 
|  | #	Base options on whether this partition is node or process scheduled. | 
|  | # | 
|  | my ($type, $options); | 
|  | my $rval = system("scontrol show config | grep cons_res >> /dev/null"); | 
|  | if ($rval) { | 
|  | $type = "Nodes"; | 
|  | $options =  "\"%8i  %8u %.6D %2t %S %.12l  %.9P %.11M  %1000R\""; | 
|  | } else { | 
|  | $type = "Procs"; | 
|  | $options =  "\"%8i  %8u %.6C %2t %S %.12l  %.9P %.11M  %1000R\""; | 
|  | } | 
|  |  | 
|  | # | 
|  | #	Get the job information. | 
|  | # | 
|  |  | 
|  | my $ct = 0; | 
|  | my $pat = "tr -s '[' '\000'  |cut -d'-' -f 1 | cut -d',' -f 1"; | 
|  | my @sout = `squeue -o $options`; | 
|  | foreach my $tmp (@sout) { | 
|  | next if ($tmp =~ /^JOBID/); | 
|  | next if ($running && $tmp =~ / PD /); | 
|  | chomp $tmp; | 
|  | my @line = split(' ', $tmp); | 
|  | $s_job[$ct]    = $line[0]; | 
|  | $s_user[$ct]   = $line[1]; | 
|  | $s_nodes[$ct]  = $line[2]; | 
|  | $s_status[$ct] = $line[3]; | 
|  | $line[4] =~ s/^.....//; | 
|  | $line[4] = "N/A" if ($line[3] =~ /PD/); | 
|  | $s_begin[$ct]  = $line[4]; | 
|  | $s_limit[$ct]  = $line[5]; | 
|  | if ($line[5] eq "UNLIMITED") { | 
|  | $s_limit[$ct] = $line[5]; | 
|  | } else { | 
|  | $s_limit[$ct] = convert_time($line[5]); | 
|  | } | 
|  |  | 
|  | $s_pool[$ct] = $line[6]; | 
|  | $s_used[$ct] = $line[7]; | 
|  | # | 
|  | #		Only keep the master node from the nodes list. | 
|  | # | 
|  | $line[8] =~ s/\[([0-9.]*).*/$1/; | 
|  | $s_master[$ct] = $line[8]; | 
|  | $ct++; | 
|  | } | 
|  |  | 
|  |  | 
|  | printf("Running job data:\n"); | 
|  |  | 
|  | if ($verbose) { | 
|  | printf("---------------------------------------------------------------------------------------------------\n"); | 
|  | printf("                                                 Time        Time            Time                  \n"); | 
|  | printf("JobID    User      $type Pool      Status        Used       Limit         Started  Master/Other    \n"); | 
|  | printf("---------------------------------------------------------------------------------------------------\n"); | 
|  | } else { | 
|  | printf("----------------------------------------------------------------------\n"); | 
|  | printf("JobID    User      $type Pool      Status        Used  Master/Other   \n"); | 
|  | printf("----------------------------------------------------------------------\n"); | 
|  | } | 
|  |  | 
|  | for (my $i = 0; $i < $ct; $i++) { | 
|  | if ($verbose) { | 
|  | printf("%-8s %-8s %6s %-9s %-7s %10s %11s  %14s  %.12s\n", | 
|  | $s_job[$i], $s_user[$i], $s_nodes[$i], | 
|  | $s_pool[$i], $s_status[$i], | 
|  | $s_used[$i], $s_limit[$i], $s_begin[$i], | 
|  | $s_master[$i]); | 
|  | } else { | 
|  | printf("%-8s %-8s %6s %-9s %-7s %10s  %.12s\n", | 
|  | $s_job[$i], $s_user[$i], $s_nodes[$i], | 
|  | $s_pool[$i],  $s_status[$i], | 
|  | $s_used[$i], $s_master[$i]); | 
|  | } | 
|  | } | 
|  | printf("\n"); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | # | 
|  | # Get the SLURM partitions. | 
|  | # | 
|  | sub do_scontrol_part | 
|  | { | 
|  |  | 
|  | # | 
|  | #	Get All partition data Don't need it all now, but | 
|  | #	it may be useful later. | 
|  | # | 
|  | my @scon = `scontrol show part`; | 
|  | my $part; | 
|  | foreach my $tmp (@scon) { | 
|  | chomp $tmp; | 
|  | my @line = split(' ',$tmp); | 
|  | ($part) = ($tmp =~ m/PartitionName=(\S+)/) if ($tmp =~ /PartitionName=/); | 
|  |  | 
|  | ($MaxTime{$part})  = ($tmp =~ m/MaxTime=(\S+)\s+/)  if ($tmp =~ /MaxTime=/); | 
|  | ($MaxNodes{$part}) = ($tmp =~ m/MaxNodes=(\S+)\s+/) if ($tmp =~ /MaxNodes=/); | 
|  | $MaxTime{$part}  =~ s/UNLIMITED/UNLIM/ if ($MaxTime{$part}); | 
|  | $MaxNodes{$part} =~ s/UNLIMITED/UNLIM/ if ($MaxNodes{$part}); | 
|  | } | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Show the man page. | 
|  | # | 
|  | sub show_man | 
|  | { | 
|  |  | 
|  | if ($< == 0) {    # Cannot invoke perldoc as root | 
|  | my $id = eval { getpwnam("nobody") }; | 
|  | $id = eval { getpwnam("nouser") } unless defined $id; | 
|  | $id = -2                          unless defined $id; | 
|  | $<  = $id; | 
|  | printf("\n You can not do this as root!\n\n"); | 
|  | exit 1; | 
|  | } | 
|  | $> = $<;                         # Disengage setuid | 
|  | $ENV{PATH} = "/bin:/usr/bin";    # Untaint PATH | 
|  | delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; | 
|  | if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; }    # Untaint $0 | 
|  | else { die "Illegal characters were found in \$0 ($0)\n"; } | 
|  | pod2usage(-exitstatus => 0, -verbose => 2); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Convert the time to a better format. | 
|  | # | 
|  | sub convert_time | 
|  | { | 
|  | my $val = shift(@_); | 
|  |  | 
|  | my $tmp; | 
|  | my @field = split(/-|:/, $val); | 
|  | if (@field == 4) { | 
|  | $tmp = ($field[0]*24)+$field[1] . ':'.$field[2] . ':' . $field[3]; | 
|  | } else { | 
|  | $tmp = sprintf("%8s",$val); | 
|  | } | 
|  |  | 
|  | return($tmp); | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Get options. | 
|  | # | 
|  | sub get_options | 
|  | { | 
|  | GetOptions( | 
|  | 'help|h|?' => \$help, | 
|  | 'man'      => \$man, | 
|  | 'v'        => \$verbose, | 
|  | 'r'        => \$running, | 
|  | 'c'        => \$pool, | 
|  | ) or usage(1); | 
|  |  | 
|  | show_man() if ($man); | 
|  | usage(0)   if ($help); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Usage. | 
|  | # | 
|  | sub usage | 
|  | { | 
|  | my $eval = shift(@_); | 
|  |  | 
|  | # | 
|  | #	Print usage instructions and exit. | 
|  | # | 
|  | print STDERR "\nUsage: sjstat [-h] [-c] p\[-man] [-r] [-v]\n"; | 
|  |  | 
|  | printf("\ | 
|  | -h	shows usage. | 
|  | -c	shows computing resources info only. | 
|  | -man	shows man page. | 
|  | -r	show only running jobs. | 
|  | -v	is for the verbose mode.\n | 
|  |  | 
|  | Output is very similar to that of squeue. | 
|  | \n\n"); | 
|  |  | 
|  | exit($eval); | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Determine if SLURM is available. | 
|  | # | 
|  | sub isslurmup | 
|  | { | 
|  | my $out = `scontrol show part 2>&1`; | 
|  | if ($?) { | 
|  | printf("\n SLURM is not communicating.\n\n"); | 
|  | exit(1); | 
|  | } | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  |  | 
|  | __END__ | 
|  |  | 
|  | =head1 NAME | 
|  |  | 
|  | B<sjstat> - List attributes of jobs under the SLURM control | 
|  |  | 
|  | =head1 SYNOPSIS | 
|  |  | 
|  | B<sjstat> [B<-h> ] [B<-c>] [B<-r> ] [B<-v>] | 
|  |  | 
|  | =head1 DESCRIPTION | 
|  |  | 
|  | The B<sjstat> command is used to display statistics of jobs under control of SLURM. | 
|  | The output is designed to give information on the resource usage and availablilty, | 
|  | as well as information about jobs that are currently active on the machine. This output | 
|  | is built using the SLURM utilities, sinfo, squeue and scontrol, the man pages for these | 
|  | utilities will provide more information and greater depth of understanding. | 
|  |  | 
|  | =head1 OPTIONS | 
|  |  | 
|  | =over 4 | 
|  |  | 
|  | =item B<-h> | 
|  |  | 
|  | Display a brief help message | 
|  |  | 
|  | =item B<-c> | 
|  |  | 
|  | Display the computing resource information only. | 
|  |  | 
|  | =item B<-man> | 
|  |  | 
|  | Show the man page. | 
|  |  | 
|  | =item B<-r> | 
|  |  | 
|  | Display only the running jobs. | 
|  |  | 
|  | =item B<-v> | 
|  |  | 
|  | Display more verbose information. | 
|  |  | 
|  | =back | 
|  |  | 
|  | =head1 EXAMPLE | 
|  |  | 
|  | The following is a basic request for status. | 
|  |  | 
|  | > sjstat | 
|  |  | 
|  | Scheduling pool data: | 
|  | ------------------------------------------------------------ | 
|  | Pool         Memory  Cpus  Total Usable   Free  Other Traits | 
|  | ------------------------------------------------------------ | 
|  | pdebug      15000Mb     8     32     32     24  (null) | 
|  | pbatch*     15000Mb     8   1072   1070    174  (null) | 
|  |  | 
|  |  | 
|  | Running job data: | 
|  | ------------------------------------------------------------------- | 
|  | JobID    User      Nodes Pool       Status        Used Master/Other | 
|  | ------------------------------------------------------------------- | 
|  | 395      mary       1000 pbatch     PD            0:00 (JobHeld) | 
|  | 396      mary       1000 pbatch     PD            0:00 (JobHeld) | 
|  | 375      sam        1000 pbatch     CG            0:00 (JobHeld) | 
|  | 388      fred         32 pbatch     R            25:27 atlas89 | 
|  | 361      harry       512 pbatch     R          1:01:12 atlas618 | 
|  | 1077742  sally         8 pdebug     R            20:16 atlas18 | 
|  |  | 
|  |  | 
|  | The Scheduling data contains information pertaining to the: | 
|  |  | 
|  | Pool  	  a set of nodes | 
|  | Memory	  the amount of memory on each node | 
|  | Cpus	  the number of cpus on each node | 
|  | Total	  the total number of nodes in the pool | 
|  | Usable	  total usaable nodes in the pool | 
|  | Free	  total nodes that are currently free | 
|  |  | 
|  | The Running job data contains information pertaining to the: | 
|  |  | 
|  | JobID		the SLURM job id | 
|  | User		owner of the job | 
|  | Nodes		nodes required, or in use by the job | 
|  | (Note: On cpu scheduled machines, this field | 
|  | will be labeled "Procs" show the number of processors | 
|  | the job is using.) | 
|  | Pool 		the Pool  required or in use by the job | 
|  | Status		current status of the job | 
|  | Used 		Wallclick time used by the job | 
|  | Master/Other 	Either the Master (head) node used by the job, or may | 
|  | indicate further status of a pending, or completing job. | 
|  |  | 
|  | The common status values are: | 
|  |  | 
|  | R	The job is running | 
|  | PD	The job is Pending | 
|  | CG	The job is Completing | 
|  |  | 
|  | These are states reported by SLURM and more elaborate documentation | 
|  | can be found in the squeue/sinfo man pages. | 
|  |  | 
|  |  | 
|  | An example of the -v option. | 
|  |  | 
|  | Scheduling pool data: | 
|  | ----------------------------------------------------------------------------- | 
|  | Total  Usable   Free   Node   Time  Other | 
|  | Pool         Memory  Cpus  Nodes   Nodes  Nodes  Limit  Limit  Traits | 
|  | ----------------------------------------------------------------------------- | 
|  | pdebug      15000Mb     8     32      32     24     16     30  (null) | 
|  | pbatch*     15000Mb     8   1072    1070    174  UNLIM  UNLIM  (null) | 
|  |  | 
|  | Running job data: | 
|  | --------------------------------------------------------------------------------------------------- | 
|  | Time        Time            Time | 
|  | JobID    User      Nodes Pool      Status        Used       Limit         Started  Master/Other | 
|  | --------------------------------------------------------------------------------------------------- | 
|  | 38562    tom           4 pbatch    PD            0:00     1:00:00  01-14T18:11:22  (JobHeld) | 
|  |  | 
|  | The added fields to the "Scheduling pool data" are: | 
|  |  | 
|  | Node Limit	SLURM imposed node limit. | 
|  | Time Limit	SLURM imposed time limit, value in minutes. | 
|  |  | 
|  | The added fields to the "Running job data" are: | 
|  |  | 
|  | Limit		Time limit of job. | 
|  | Start		Start time of job. | 
|  |  | 
|  | =head1 REPORTING BUGS | 
|  |  | 
|  | Report bugs to <eckert2@llnl.gov> | 
|  |  | 
|  | =cut |