| #! /usr/bin/perl -w |
| ############################################################################### |
| # |
| # pbsnodes - queries slurm nodes in familar pbs format. |
| # |
| # |
| ############################################################################### |
| # Copyright (C) 2007 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Danny Auble <auble1@llnl.gov>. |
| # CODE-OCEC-09-009. All rights reserved. |
| # Additions by Troy Baer <tbaer@utk.edu> |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # In addition, as a special exception, the copyright holders give permission |
| # to link the code of portions of this program with the OpenSSL library under |
| # certain conditions as described in each individual source file, and |
| # distribute linked combinations including the two. You must obey the GNU |
| # General Public License in all respects for all of the code used other than |
| # OpenSSL. If you modify file(s) with this exception, you may extend this |
| # exception to your version of the file(s), but you are not obligated to do |
| # so. If you do not wish to do so, delete this exception statement from your |
| # version. If you delete this exception statement from all source files in |
| # the program, then also delete it here. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| # |
| # Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. |
| ############################################################################### |
| |
| |
| use strict; |
| use FindBin; |
| use Getopt::Long 2.24 qw(:config no_ignore_case); |
| use lib "${FindBin::Bin}/../lib/perl"; |
| use autouse 'Pod::Usage' => qw(pod2usage); |
| use Slurm ':all'; |
| use Switch; |
| |
| Main: |
| { |
| # Parse Command Line Arguments |
| my ($all, $clear, $help, $man, $shortlist, $printnote, $offline, $reset, $setnote); |
| GetOptions( |
| 'all|a' => \$all, |
| 'clear|c' => \$clear, |
| 'help|?' => \$help, |
| 'list|l' => \$shortlist, |
| 'man' => \$man, |
| 'note|n' => \$printnote, |
| 'offline|o' => \$offline, |
| 'reset|r' => \$reset, |
| 'setnote|N=s' => \$setnote |
| ) |
| or pod2usage(2); |
| |
| # Display usage if necessary |
| pod2usage(0) if $help; |
| if ($man) |
| { |
| if ($< == 0) # Cannot invoke perldoc as root |
| { |
| my $id = eval { getpwnam("nobody") }; |
| $id = eval { getpwnam("nouser") } unless defined $id; |
| $id = -2 unless defined $id; |
| $< = $id; |
| } |
| $> = $<; # Disengage setuid |
| $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH |
| delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; |
| if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0 |
| else { die "Illegal characters were found in \$0 ($0)\n"; } |
| pod2usage(-exitstatus => 0, -verbose => 2); |
| } |
| |
| # Use sole remaining argument as nodeIds |
| my @nodeIds = @ARGV; |
| my $slurm = Slurm::new(); |
| |
| |
| # handle all of the node update operations |
| if ( defined $clear || defined $offline || defined $reset ) { |
| my $oprc = 0; |
| foreach my $node (@nodeIds) { |
| my $nodestate; |
| if ( defined $clear || defined $reset ) { |
| $nodestate = {node_names=>$node,node_state=>NODE_RESUME}; |
| } elsif ( defined $offline ) { |
| $nodestate = {node_names=>$node,node_state=>NODE_STATE_DRAIN}; |
| } |
| if ( defined $setnote ) { |
| $nodestate->{'reason'} = $setnote; |
| } |
| my $rc = $slurm->update_node($nodestate); |
| if ( $rc!=0 ) { |
| $oprc += $rc; |
| } |
| } |
| exit($oprc); |
| } |
| |
| # if we've gotten to this point, we're doing some kind of list operation |
| my $resp = $slurm->load_node(0, SHOW_ALL); |
| if(!$resp) { |
| die "Problem loading node.\n"; |
| } |
| |
| my $update = $resp->{last_update}; |
| foreach my $node (@{$resp->{node_array}}) { |
| #print STDERR join(",",keys($node))."\n"; |
| my $nodeId = $node->{'name'}; |
| my $rCProc = $node->{'cpus'}; |
| my $rBoards = $node->{'boards'}; |
| my $rSockets = $node->{'sockets'}; |
| my $rCores = $node->{'cores'}; |
| my $rThreads = $node->{'threads'}; |
| my $features = $node->{'features'}; |
| my $rAMem = $node->{'real_memory'}; |
| my $rAProc = ($node->{'cpus'} - |
| ($node->{'alloc_cpus'} + $node->{'err_cpus'})); |
| my $state = lc(Slurm->node_state_string($node->{'node_state'})); |
| my $reason = $node->{'reason'}; |
| my $gres = $node->{'gres'}; |
| my $os = lc($node->{'os'}); |
| my $arch = $node->{'arch'}; |
| my $disksize = $node->{'tmp_disk'}; |
| |
| # deal w/ specific types of gres |
| my $gpus = 0; |
| my $mics = 0; |
| if ( defined $gres ) { |
| my @gres = split(/,/,$gres); |
| foreach my $grestype ( @gres ) { |
| my @elt = split(/:/,$grestype); |
| if ( $#elt>0 && $elt[0] eq "gpu" ) { |
| $gpus = int($elt[1]); |
| } |
| if ( $#elt>0 && $elt[0] eq "mic" ) { |
| $mics = int($elt[1]); |
| } |
| } |
| } |
| |
| # find job(s) on node |
| my $jobs; |
| if ( $state eq "allocated" ) { |
| # how to get list of jobs on node efficiently? |
| } |
| |
| # this isn't really defined in SLURM, so I am not sure how to get it |
| my $load; |
| |
| # mangle SLURM states into PBS equivs |
| my $pbsstate = $state; |
| $pbsstate =~ s/drained/offline/g; |
| $pbsstate =~ s/idle/free/g; |
| $pbsstate =~ s/\*//g; |
| if ( $state eq "allocated" ) { |
| if ( $rAProc>0 ) { |
| $pbsstate = "busy"; |
| } else { |
| $pbsstate = "job-exclusive"; |
| } |
| } |
| |
| # Filter nodes according to options and arguments |
| if (@nodeIds) { |
| next unless grep /^$nodeId/, @nodeIds; |
| } |
| |
| if ( !defined($shortlist) ) { |
| # Prepare variables |
| my @status = (); |
| push @status, "rectime=$update" if defined $update; |
| push @status, "jobs=$jobs" if defined $jobs; |
| push @status, "state=$pbsstate" if defined $pbsstate; |
| push @status, "slurmstate=$state" if defined $state; |
| push @status, "size=".(int($disksize)*1024)."kb:".(int($disksize)*1024)."kb" if defined $disksize; |
| push @status, "gres=$gres" if defined $gres; |
| push @status, "message=\"$reason\"" if defined $reason; |
| push @status, "loadave=" . sprintf("%.2f", $load) if defined $load; |
| push @status, "ncpus=${rCProc}" if defined $rCProc; |
| push @status, "boards=${rBoards}" if defined $rBoards; |
| push @status, "sockets=${rSockets}" if defined $rSockets; |
| push @status, "cores=${rCores}" if defined $rCores; |
| push @status, "threads=${rThreads}" if defined $rThreads; |
| push @status, "availmem=${rAMem}mb" if defined $rAMem; |
| push @status, "opsys=$os" if defined $os; |
| push @status, "arch=$arch" if defined $arch; |
| |
| # Print the node attributes |
| printf "%s\n", $nodeId; |
| printf " state = %s\n", $pbsstate; |
| printf " np = %s\n", $rCProc if $rCProc; |
| printf " properties = %s\n", join(' ', split(/:/, $features)) |
| if $features; |
| printf " ntype = cluster\n"; |
| printf " status = %s\n", join(',', @status) if @status; |
| printf " note = %s\n", $reason if defined $reason; |
| printf " gpus = %d\n", $gpus if $gpus>0; |
| printf " mics = %d\n", $mics if $mics>0; |
| print "\n"; |
| } else { |
| if ( $state =~ /drained|down/i ) { |
| printf "%s\t\t%s",$nodeId,$pbsstate; |
| printf "\t\t%s",$reason if ( defined $printnote && defined $reason ); |
| print "\n"; |
| } |
| } |
| } |
| |
| # Exit with status code |
| exit 0; |
| } |
| |
| |
| ############################################################################## |
| |
| __END__ |
| |
| =head1 NAME |
| |
| B<pbsnodes> - display and manipulate host information in a PBS-like format |
| |
| =head1 SYNOPSIS |
| |
| B<pbsnodes> [B<-a>] [I<node_id>...] |
| |
| B<pbsnodes> B<-l> [B<-n>] |
| |
| B<pbsnodes> B<-{c|r|o}> [I<node_id>...] [ B<-N> "note/reason string"] |
| |
| =head1 DESCRIPTION |
| |
| The B<pbsnodes> command displays and manipulates information about |
| nodes. |
| |
| =head1 OPTIONS |
| |
| =over 4 |
| |
| =item B<-a> |
| |
| Display information for all nodes. This is the default if no node name |
| is specified. |
| |
| =item B<-c> |
| |
| Clear OFFLINE from listed nodes. |
| |
| =item B<-l> |
| |
| List node names and their state for nodes that are DOWN, OFFLINE, or |
| UNKNOWN. |
| |
| =item B<-N> |
| |
| Specify a "note/reason" attribute. Use "" to clear field. |
| |
| =item B<-n> |
| |
| Show the "note/reason" attribute for nodes that are DOWN, OFFLINE, or |
| UNKNOWN. This option requires B<-l>. |
| |
| =item B<-r> |
| |
| Reset the listed nodes by clearing OFFLINE. Functionally equivalent |
| to B<-c>. |
| |
| =item B<-? | --help> |
| |
| brief help message |
| |
| =item B<--man> |
| |
| full documentation |
| |
| =back |
| |
| =cut |
| |