Merge branch 'slurm-2.3' into slurm-2.4
diff --git a/META b/META
index 10c7300..53c3578 100644
--- a/META
+++ b/META
@@ -8,10 +8,11 @@
Meta: 1
Name: slurm
Major: 2
- Minor: 3
+ Minor: 4
Micro: 5
- Version: 2.3.5
+ Version: 2.4.5
Release: 1
+
##
# When changing API_CURRENT update src/common/slurm_protocol_common.h
# with a new SLURM_PROTOCOL_VERSION signifing the old one and the version
@@ -20,6 +21,6 @@
# and _get_slurm_version()
# need to be updated also when changes are added also.
##
- API_CURRENT: 23
+ API_CURRENT: 24
API_AGE: 0
API_REVISION: 0
diff --git a/Makefile.am b/Makefile.am
index 3436f6c..14c42cc 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,9 +7,10 @@
etc/bluegene.conf.example \
etc/federation.conf.example \
etc/slurm.conf.example \
+ etc/slurmdbd.conf.example \
etc/slurm.epilog.clean \
- etc/init.d.slurm \
- etc/init.d.slurmdbd \
+ etc/init.d.slurm.in \
+ etc/init.d.slurmdbd.in \
etc/cgroup.conf.example \
etc/cgroup.release_common.example \
etc/cgroup_allowed_devices_file.conf.example \
@@ -30,7 +31,7 @@
pkginclude_HEADERS = \
slurm/pmi.h \
slurm/slurm.h \
- slurm/slurmdb.h \
+ slurm/slurmdb.h \
slurm/slurm_errno.h \
slurm/spank.h
diff --git a/Makefile.in b/Makefile.in
index 586a1df..5668dc0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -43,6 +43,8 @@
$(top_srcdir)/contribs/perlapi/libslurm/perl/Makefile.PL.in \
$(top_srcdir)/contribs/perlapi/libslurmdb/perl/Makefile.PL.in \
$(top_srcdir)/contribs/phpext/slurm_php/config.m4.in \
+ $(top_srcdir)/etc/init.d.slurm.in \
+ $(top_srcdir)/etc/init.d.slurmdbd.in \
$(top_srcdir)/slurm/slurm.h.in AUTHORS COPYING ChangeLog \
INSTALL NEWS
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -93,7 +95,8 @@
CONFIG_CLEAN_FILES = config.xml \
contribs/perlapi/libslurm/perl/Makefile.PL \
contribs/perlapi/libslurmdb/perl/Makefile.PL \
- contribs/phpext/slurm_php/config.m4
+ contribs/phpext/slurm_php/config.m4 etc/init.d.slurm \
+ etc/init.d.slurmdbd
CONFIG_CLEAN_VPATH_FILES =
SOURCES =
DIST_SOURCES =
@@ -125,6 +128,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkgincludedir)"
HEADERS = $(pkginclude_HEADERS)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
@@ -139,9 +148,11 @@
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
- { test ! -d "$(distdir)" \
- || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
- && rm -fr "$(distdir)"; }; }
+ if test -d "$(distdir)"; then \
+ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+ && rm -rf "$(distdir)" \
+ || { sleep 5 && rm -rf "$(distdir)"; }; \
+ else :; fi
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
@@ -170,6 +181,8 @@
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
@@ -248,6 +261,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -284,6 +298,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -378,9 +393,10 @@
etc/bluegene.conf.example \
etc/federation.conf.example \
etc/slurm.conf.example \
+ etc/slurmdbd.conf.example \
etc/slurm.epilog.clean \
- etc/init.d.slurm \
- etc/init.d.slurmdbd \
+ etc/init.d.slurm.in \
+ etc/init.d.slurmdbd.in \
etc/cgroup.conf.example \
etc/cgroup.release_common.example \
etc/cgroup_allowed_devices_file.conf.example \
@@ -401,7 +417,7 @@
pkginclude_HEADERS = \
slurm/pmi.h \
slurm/slurm.h \
- slurm/slurmdb.h \
+ slurm/slurmdb.h \
slurm/slurm_errno.h \
slurm/spank.h
@@ -416,7 +432,7 @@
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
-am--refresh:
+am--refresh: Makefile
@:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
@@ -452,10 +468,8 @@
$(am__aclocal_m4_deps):
config.h: stamp-h1
- @if test ! -f $@; then \
- rm -f stamp-h1; \
- $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
- else :; fi
+ @if test ! -f $@; then rm -f stamp-h1; else :; fi
+ @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
@@ -466,10 +480,8 @@
touch $@
slurm/slurm.h: slurm/stamp-h2
- @if test ! -f $@; then \
- rm -f slurm/stamp-h2; \
- $(MAKE) $(AM_MAKEFLAGS) slurm/stamp-h2; \
- else :; fi
+ @if test ! -f $@; then rm -f slurm/stamp-h2; else :; fi
+ @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) slurm/stamp-h2; else :; fi
slurm/stamp-h2: $(top_srcdir)/slurm/slurm.h.in $(top_builddir)/config.status
@rm -f slurm/stamp-h2
@@ -485,6 +497,10 @@
cd $(top_builddir) && $(SHELL) ./config.status $@
contribs/phpext/slurm_php/config.m4: $(top_builddir)/config.status $(top_srcdir)/contribs/phpext/slurm_php/config.m4.in
cd $(top_builddir) && $(SHELL) ./config.status $@
+etc/init.d.slurm: $(top_builddir)/config.status $(top_srcdir)/etc/init.d.slurm.in
+ cd $(top_builddir) && $(SHELL) ./config.status $@
+etc/init.d.slurmdbd: $(top_builddir)/config.status $(top_srcdir)/etc/init.d.slurmdbd.in
+ cd $(top_builddir) && $(SHELL) ./config.status $@
mostlyclean-libtool:
-rm -f *.lo
@@ -511,9 +527,7 @@
@$(NORMAL_UNINSTALL)
@list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- test -n "$$files" || exit 0; \
- echo " ( cd '$(DESTDIR)$(pkgincludedir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(pkgincludedir)" && rm -f $$files
+ dir='$(DESTDIR)$(pkgincludedir)'; $(am__uninstall_files_from_dir)
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
@@ -722,7 +736,11 @@
$(am__remove_distdir)
dist-bzip2: distdir
- tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+ tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+ $(am__remove_distdir)
+
+dist-lzip: distdir
+ tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
$(am__remove_distdir)
dist-lzma: distdir
@@ -730,7 +748,7 @@
$(am__remove_distdir)
dist-xz: distdir
- tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+ tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
$(am__remove_distdir)
dist-tarZ: distdir
@@ -761,6 +779,8 @@
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lzma*) \
lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+ *.tar.lz*) \
+ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \
@@ -780,6 +800,7 @@
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
+ $(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
@@ -808,8 +829,16 @@
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
- @$(am__cd) '$(distuninstallcheck_dir)' \
- && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+ @test -n '$(distuninstallcheck_dir)' || { \
+ echo 'ERROR: trying to run $@ with an empty' \
+ '$$(distuninstallcheck_dir)' >&2; \
+ exit 1; \
+ }; \
+ $(am__cd) '$(distuninstallcheck_dir)' || { \
+ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+ exit 1; \
+ }; \
+ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
@@ -843,10 +872,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -935,8 +969,8 @@
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am am--refresh check check-am clean clean-generic \
clean-libtool ctags ctags-recursive dist dist-all dist-bzip2 \
- dist-gzip dist-lzma dist-shar dist-tarZ dist-xz dist-zip \
- distcheck distclean distclean-generic distclean-hdr \
+ dist-gzip dist-lzip dist-lzma dist-shar dist-tarZ dist-xz \
+ dist-zip distcheck distclean distclean-generic distclean-hdr \
distclean-libtool distclean-local distclean-tags \
distcleancheck distdir distuninstallcheck dvi dvi-am html \
html-am info info-am install install-am install-data \
diff --git a/NEWS b/NEWS
index 3671b13..abafea4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,512 @@
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
+* Changes in SLURM 2.4.6
+========================
+ -- Correct WillRun authentication logic when issued for non-job owner.
+ -- BGQ - fix memory leak
+ -- BGQ - Fix to check block for action 'D' if it also has nodes in error.
+
+* Changes in SLURM 2.4.5
+========================
+ -- Cray - On job kill requeust, send SIGCONT, SIGTERM, wait KillWait and send
+ SIGKILL. Previously just sent SIGKILL to tasks.
+ -- BGQ - Fix issue when running srun outside of an allocation and only
+ specifying the number of tasks and not the number of nodes.
+ -- BGQ - validate correct ntasks_per_node
+ -- BGQ - when srun -Q is given make runjob be quiet
+ -- Modify use of OOM (out of memory protection) for Linux 2.6.36 kernel
+ or later. NOTE: If you were setting the environment variable
+ SLURMSTEPD_OOM_ADJ=-17, it should be set to -1000 for Linux 2.6.36 kernel
+ or later.
+ -- BGQ - Fix job step timeout actually happen when done from within an
+ allocation.
+ -- Reset node MAINT state flag when a reservation's nodes or flags change.
+ -- Accounting - Fix issue where QOS usage was being zeroed out on a
+ slurmctld restart.
+ -- BGQ - Add 64 tasks per node as a valid option for srun when used with
+ overcommit.
+ -- BLUEGENE - With Dynamic layout mode - Fix issue where if a larger block
+ was already in error and isn't deallocating and underlying hardware goes
+ bad one could get overlapping blocks in error making the code assert when
+ a new job request comes in.
+ -- BGQ - handle pending actions on a block better when trying to deallocate it.
+ -- Accounting - Fixed issue where if nodenames have changed on a system and
+ you query against that with -N and -E you will get all jobs during that
+ time instead of only the ones running on -N.
+ -- BGP - Fix for HTC mode
+ -- Accounting - If a job start message fails to the SlurmDBD reset the db_inx
+ so it gets sent again. This isn't a major problem since the start will
+ happen when the job ends, but this does make things cleaner.
+ -- If an salloc is waiting for an allocation to happen and is canceled by the
+ user mark the state canceled instead of completed.
+ -- Fix issue in accounting if a user puts a '\' in their job name.
+ -- Accounting - Fix for if asking for users or accounts that were deleted
+ with associations get the deleted associations as well.
+ -- BGQ - Handle shared blocks that need to be removed and have jobs running
+ on them. This should only happen in extreme conditions.
+ -- Fix inconsistency for hostlists that have more than 1 range.
+ -- BGQ - Add mutex around recovery for the Real Time server to avoid hitting
+ DB2 so hard.
+ -- BGQ - If an allocation exists on a block that has a 'D' action on it fail
+ job on future step creation attempts.
+
+* Changes in SLURM 2.4.4
+========================
+ -- BGQ - minor fix to make build work in emulated mode.
+ -- BGQ - Fix if large block goes into error and the next highest priority jobs
+ are planning on using the block. Previously it would fail those jobs
+ erroneously.
+ -- BGQ - Fix issue when a cnode going to an error (not SoftwareError) state
+ with a job running or trying to run on it.
+ -- Execute slurm_spank_job_epilog when there is no system Epilog configured.
+ -- Fix for srun --test-only to work correctly with timelimits
+ -- BGQ - If a job goes away while still trying to free it up in the
+ database, and the job is running on a small block make sure we free up
+ the correct node count.
+ -- BGQ - Logic added to make sure a job has finished on a block before it is
+ purged from the system if its front-end node goes down.
+ -- Modify strigger so that a filter option of "--user=0" is supported.
+ -- Correct --mem-per-cpu logic for core or socket allocations with multiple
+ threads per core.
+ -- Fix for older < glibc 2.4 systems to use euidaccess() instead of eaccess().
+ -- BLUEGENE - Do not alter a pending job's node count when changing it's
+ partition.
+ -- BGQ - Add functionality to make it so we track the actions on a block.
+ This is needed for when a free request is added to a block but there are
+ jobs finishing up so we don't start new jobs on the block since they will
+ fail on start.
+ -- BGQ - Fixed InactiveLimit to work correctly to avoid scenarios where a
+ user's pending allocation was started with srun and then for some reason
+ the slurmctld was brought down and while it was down the srun was removed.
+ -- Fixed InactiveLimit math to work correctly
+ -- BGQ - Add logic to make it so blocks can't use a midplane with a nodeboard
+ in error for passthrough.
+ -- BGQ - Make it so if a nodeboard goes in error any block using that midplane
+ for passthrough gets removed on a dynamic system.
+ -- BGQ - Fix for printing realtime server debug correctly.
+ -- BGQ - Cleaner handling of cnode failures when reported through the runjob
+ interface instead of through the normal method.
+ -- smap - spread node information across multiple lines for larger systems.
+ -- Cray - Defer salloc until after PrologSlurmctld completes.
+ -- Correction to slurmdbd communications failure handling logic, incorrect
+ error codes returned in some cases.
+
+* Changes in SLURM 2.4.3
+========================
+ -- Accounting - Fix so complete 32 bit numbers can be put in for a priority.
+ -- cgroups - fix if initial directory is non-existent SLURM creates it
+ correctly. Before the errno wasn't being checked correctly
+ -- BGQ - fixed srun when only requesting a task count and not a node count
+ to operate the same way salloc or sbatch did and assign a task per cpu
+ by default instead of task per node.
+ -- Fix salloc --gid to work correctly. Reported by Brian Gilmer
+ -- BGQ - fix smap to set the correct default MloaderImage
+ -- BLUEGENE - updated documentation.
+ -- Close the batch job's environment file when it contains no data to avoid
+ leaking file descriptors.
+ -- Fix sbcast's credential to last till the end of a job instead of the
+ previous 20 minute time limit. The previous behavior would fail for
+ large files 20 minutes into the transfer.
+ -- Return ESLURM_NODES_BUSY rather than ESLURM_NODE_NOT_AVAIL error on job
+ submit when required nodes are up, but completing a job or in exclusive
+ job allocation.
+ -- Add HWLOC_FLAGS so linking to libslurm works correctly
+ -- BGQ - If using backfill and a shared block is running at least one job
+ and a job comes through backfill and can fit on the block without ending
+ jobs don't set an end_time for the running jobs since they don't need to
+ end to start the job.
+ -- Initialize bind_verbose when using task/cgroup.
+ -- BGQ - Fix for handling backfill much better when sharing blocks.
+ -- BGQ - Fix for making small blocks on first pass if not sharing blocks.
+ -- BLUEGENE - Remove force of default conn_type instead of leaving NAV
+ when none are requested. The Block allocator sets it up temporarily so
+ this isn't needed.
+ -- BLUEGENE - Fix deadlock issue when dealing with bad hardware if using
+ static blocks.
+ -- Fix to mysql plugin during rollup to only query suspended table when jobs
+ reported some suspended time.
+ -- Fix compile with glibc 2.16 (Kacper Kowalik)
+ -- BGQ - fix for deadlock where a block has error on it and all jobs
+ running on it are preemptable by scheduling job.
+ -- proctrack/cgroup: Exclude internal threads from "scontrol list pids".
+ Patch from Matthieu Hautreux, CEA.
+ -- Memory leak fixed for select/linear when preempting jobs.
+ -- Fix if updating begin time of a job to update the eligible time in
+ accounting as well.
+ -- BGQ - make it so you can signal steps when signaling the job allocation.
+ -- BGQ - Remove extra overhead if a large block has many cnode failures.
+ -- Priority/Multifactor - Fix issue with age factor when a job is estimated to
+ start in the future but is able to run now.
+ -- CRAY - update to work with ALPS 5.1
+ -- BGQ - Handle issue of speed and mutexes when polling instead of using the
+ realtime server.
+ -- BGQ - Fix minor sorting issue with sview when sorting by midplanes.
+ -- Accounting - Fix for handling per user max node/cpus limits on a QOS
+ correctly for current job.
+ -- Update documentation for -/+= when updating a reservation's
+ users/accounts/flags
+ -- Update pam module to work if using aliases on nodes instead of actual
+ host names.
+ -- Correction to task layout logic in select/cons_res for job with minimum
+ and maximum node count.
+ -- BGQ - Put final poll after realtime comes back into service to avoid
+ having the realtime server go down over and over again while waiting
+ for the poll to finish.
+ -- task/cgroup/memory - ensure that ConstrainSwapSpace=no is correctly
+ handled. Work by Matthieu Hautreux, CEA.
+ -- CRAY - Fix for sacct -N option to work correctly
+ -- CRAY - Update documentation to describe installation from rpm instead
+ or previous piecemeal method.
+ -- Fix sacct to work with QOS' that have previously been deleted.
+ -- Added all available limits to the output of sacctmgr list qos
+
+* Changes in SLURM 2.4.2
+========================
+ -- BLUEGENE - Correct potential deadlock issue when hardware goes bad and
+ there are jobs running on that hardware.
+ -- If job is submitted to more than one partition, it's partition pointer can
+ be set to an invalid value. This can result in the count of CPUs allocated
+ on a node being bad, resulting in over- or under-allocation of its CPUs.
+ Patch by Carles Fenoy, BSC.
+ -- Fix bug in task layout with select/cons_res plugin and --ntasks-per-node
+ option. Patch by Martin Perry, Bull.
+ -- BLUEGENE - remove race condition where if a block is removed while waiting
+ for a job to finish on it the number of unused cpus wasn't updated
+ correctly.
+ -- BGQ - make sure we have a valid block when creating or finishing a step
+ allocation.
+ -- BLUEGENE - If a large block (> 1 midplane) is in error and underlying
+ hardware is marked bad remove the larger block and create a block over
+ just the bad hardware making the other hardware available to run on.
+ -- BLUEGENE - Handle job completion correctly if an admin removes a block
+ where other blocks on an overlapping midplane are running jobs.
+ -- BLUEGENE - correctly remove running jobs when freeing a block.
+ -- BGQ - correct logic to place multiple (< 1 midplane) steps inside a
+ multi midplane block allocation.
+ -- BGQ - Make it possible for a multi midplane allocation to run on more
+ than 1 midplane but not the entire allocation.
+ -- BGL - Fix for syncing users on block from Tim Wickberg
+ -- Fix initialization of protocol_version for some messages to make sure it
+ is always set when sending or receiving a message.
+ -- Reset backfilled job counter only when explicitly cleared using scontrol.
+ Patch from Alejandro Lucero Palau, BSC.
+ -- BLUEGENE - Fix for handling blocks when a larger block will not free and
+ while it is attempting to free underlying hardware is marked in error
+ making small blocks overlapping with the freeing block. This only
+ applies to dynamic layout mode.
+ -- Cray and BlueGene - Do not treat lack of usable front-end nodes when
+ slurmctld deamon starts as a fatal error. Also preserve correct front-end
+ node for jobs when there is more than one front-end node and the slurmctld
+ daemon restarts.
+ -- Correct parsing of srun/sbatch input/output/error file names so that only
+ the name "none" is mapped to /dev/null and not any file name starting
+ with "none" (e.g. "none.o").
+ -- BGQ - added version string to the load of the runjob_mux plugin to verify
+ the current plugin has been loaded when using runjob_mux_refresh_config
+ -- CGROUPS - Use system mount/umount function calls instead of doing fork
+ exec of mount/umount from Janne Blomqvist.
+ -- BLUEGENE - correct start time setup when no jobs are blocking the way
+ from Mark Nelson
+ -- Fixed sacct --state=S query to return information about suspended jobs
+ current or in the past.
+ -- FRONTEND - Made error warning more apparent if a frontend node isn't
+ configured correctly.
+ -- BGQ - update documentation about runjob_mux_refresh_config which works
+ correctly as of IBM driver V1R1M1 efix 008.
+
+* Changes in SLURM 2.4.1
+========================
+ -- Fix bug for job state change from 2.3 -> 2.4 job state can now be preserved
+ correctly when transitioning. This also applies for 2.4.0 -> 2.4.1, no
+ state will be lost. (Thanks to Carles Fenoy)
+
+* Changes in SLURM 2.4.0
+========================
+ -- Cray - Improve support for zero compute note resource allocations.
+ Partition used can now be configured with no nodes nodes.
+ -- BGQ - make it so srun -i<taskid> works correctly.
+ -- Fix parse_uint32/16 to complain if a non-digit is given.
+ -- Add SUBMITHOST to job state passed to Moab vial sched/wiki2. Patch by Jon
+ Bringhurst (LANL).
+ -- BGQ - Fix issue when running with AllowSubBlockAllocations=Yes without
+ compiling with --enable-debug
+ -- Modify scontrol to require "-dd" option to report batch job's script. Patch
+ from Don Albert, Bull.
+ -- Modify SchedulerParamters option to match documentation: "bf_res="
+ changed to "bf_resolution=". Patch from Rod Schultz, Bull.
+ -- Fix bug that clears job pending reason field. Patch fron Don Lipari, LLNL.
+ -- In etc/init.d/slurm move check for scontrol after sourcing
+ /etc/sysconfig/slurm. Patch from Andy Wettstein, University of Chicago.
+ -- Fix in scheduling logic that can delay jobs with min/max node counts.
+ -- BGQ - fix issue where if a step uses the entire allocation and then
+ the next step in the allocation only uses part of the allocation it gets
+ the correct cnodes.
+ -- BGQ - Fix checking for IO on a block with new IBM driver V1R1M1 previous
+ function didn't always work correctly.
+ -- BGQ - Fix issue when a nodeboard goes down and you want to combine blocks
+ to make a larger small block and are running with sub-blocks.
+ -- BLUEGENE - Better logic for making small blocks around bad nodeboard/card.
+ -- BGQ - When using an old IBM driver cnodes that go into error because of
+ a job kill timeout aren't always reported to the system. This is now
+ handled by the runjob_mux plugin.
+ -- BGQ - Added information on how to setup the runjob_mux to run as SlurmUser.
+ -- Improve memory consumption on step layouts with high task count.
+ -- BGQ - quiter debug when the real time server comes back but there are
+ still messages we find when we poll but haven't given it back to the real
+ time yet.
+ -- BGQ - fix for if a request comes in smaller than the smallest block and
+ we must use a small block instead of a shared midplane block.
+ -- Fix issues on large jobs (>64k tasks) to have the correct counter type when
+ packing the step layout structure.
+ -- BGQ - fix issue where if a user was asking for tasks and ntasks-per-node
+ but not node count the node count is correctly figured out.
+ -- Move logic to always use the 1st alphanumeric node as the batch host for
+ batch jobs.
+ -- BLUEGENE - fix race condition where if a nodeboard/card goes down at the
+ same time a block is destroyed and that block just happens to be the
+ smallest overlapping block over the bad hardware.
+ -- Fix bug when querying accounting looking for a job node size.
+ -- BLUEGENE - fix possible race condition if cleaning up a block and the
+ removal of the job on the block failed.
+ -- BLUEGENE - fix issue if a cable was in an error state make it so we can
+ check if a block is still makable if the cable wasn't in error.
+ -- Put nodes names in alphabetic order in node table.
+ -- If preempted job should have a grace time and preempt mode is not cancel
+ but job is going to be canceled because it is interactive or other reason
+ it now receives the grace time.
+ -- BGQ - Modified documents to explain new plugin_flags needed in bg.properties
+ in order for the runjob_mux to run correctly.
+ -- BGQ - change linking from libslurm.o to libslurmhelper.la to avoid warning.
+
+* Changes in SLURM 2.4.0.rc1
+=============================
+ -- Improve task binding logic by making fuller use of HWLOC library,
+ especially with respect to Opteron 6000 series processors. Work contributed
+ by Komoto Masahiro.
+ -- Add new configuration parameter PriorityFlags, based upon work by
+ Carles Fenoy (Barcelona Supercomputer Center).
+ -- Modify the step completion RPC between slurmd and slurmstepd in order to
+ eliminate a possible deadlock. Based on work by Matthieu Hautreux, CEA.
+ -- Change the owner of slurmctld and slurmdbd log files to the appropriate
+ user. Without this change the files will be created by and owned by the
+ user starting the daemons (likely user root).
+ -- Reorganize the slurmstepd logic in order to better support NFS and
+ Kerberos credentials via the AUKS plugin. Work by Matthieu Hautreux, CEA.
+ -- Fix bug in allocating GRES that are associated with specific CPUs. In some
+ cases the code allocated first available GRES to job instead of allocating
+ GRES accessible to the specific CPUs allocated to the job.
+ -- spank: Add callbacks in slurmd: slurm_spank_slurmd_{init,exit}
+ and job epilog/prolog: slurm_spank_job_{prolog,epilog}
+ -- spank: Add spank_option_getopt() function to api
+ -- Change resolution of switch wait time from minutes to seconds.
+ -- Added CrpCPUMins to the output of sshare -l for those using hard limit
+ accounting. Work contributed by Mark Nelson.
+ -- Added mpi/pmi2 plugin for complete support of pmi2 including acquiring
+ additional resources for newly launched tasks. Contributed by Hongjia Cao,
+ NUDT.
+ -- BGQ - fixed issue where if a user asked for a specific node count and more
+ tasks than possible without overcommit the request would be allowed on more
+ nodes than requested.
+ -- Add support for new SchedulerParameters of bf_max_job_user, maximum number
+ of jobs to attempt backfilling per user. Work by Bjørn-Helge Mevik,
+ University of Oslo.
+ -- BLUEGENE - fixed issue where MaxNodes limit on a partition only limited
+ larger than midplane jobs.
+ -- Added cpu_run_min to the output of sshare --long. Work contributed by
+ Mark Nelson.
+ -- BGQ - allow regular users to resolve Rack-Midplane to AXYZ coords.
+ -- Add sinfo output format option of "%R" for partition name without "*"
+ appended for default partition.
+ -- Cray - Add support for zero compute note resource allocation to run batch
+ script on front-end node with no ALPS reservation. Useful for pre- or post-
+ processing.
+ -- Support for cyclic distribution of cpus in task/cgroup plugin from Martin
+ Perry, Bull.
+ -- GrpMEM limit for QOSes and associations added Patch from Bjørn-Helge Mevik,
+ University of Oslo.
+ -- Various performance improvements for up to 500% higher throughput depending
+ upon configuration. Work supported by the Oak Ridge National Laboratory
+ Extreme Scale Systems Center.
+ -- Added jobacct_gather/cgroup plugin. It is not advised to use this in
+ production as it isn't currently complete and doesn't provide an equivalent
+ substitution for jobacct_gather/linux yet. Work by Martin Perry, Bull.
+
+* Changes in SLURM 2.4.0.pre4
+=============================
+ -- Add logic to cache GPU file information (bitmap index mapping to device
+ file number) in the slurmd daemon and transfer that information to the
+ slurmstepd whenever a job step is initiated. This is needed to set the
+ appropriate CUDA_VISIBLE_DEVICES environment variable value when the
+ devices are not in strict numeric order (e.g. some GPUs are skipped).
+ Based upon work by Nicolas Bigaouette.
+ -- BGQ - Remove ability to make a sub-block with a geometry with one or more
+ of it's dimensions of length 3. There is a limitation in the IBM I/O
+ subsystem that is problematic with multiple sub-blocks with a dimension
+ of length 3, so we will disallow them to be able to be created. This
+ mean you if you ask the system for an allocation of 12 c-nodes you will
+ be given 16. If this is ever fix in BGQ you can remove this patch.
+ -- BLUEGENE - Better handling blocks that go into error state or deallocate
+ while jobs are running on them.
+ -- BGQ - fix for handling mix of steps running at same time some of which
+ are full allocation jobs, and others that are smaller.
+ -- BGQ - fix for core dump after running multiple sub-block jobs on static
+ blocks.
+ -- BGQ - fixed sync issue where if a job finishes in SLURM but not in mmcs
+ for a long time after the SLURM job has been flushed from the system
+ we don't have to worry about rebooting the block to sync the system.
+ -- BGQ - In scontrol/sview node counts are now displayed with
+ CnodeCount/CnodeErrCount so to point out there are cnodes in an error state
+ on the block. Draining the block and having it reboot when all jobs are
+ gone will clear up the cnodes in Software Failure.
+ -- Change default SchedulerParameters max_switch_wait field value from 60 to
+ 300 seconds.
+ -- BGQ - catch errors from the kill option of the runjob client.
+ -- BLUEGENE - make it so the epilog runs until slurmctld tells it the job is
+ gone. Previously it had a timelimit which has proven to not be the right
+ thing.
+ -- FRONTEND - fix issue where if a compute node was in a down state and
+ an admin updates the node to idle/resume the compute nodes will go
+ instantly to idle instead of idle* which means no response.
+ -- Fix regression in 2.4.0.pre3 where number of submitted jobs limit wasn't
+ being honored for QOS.
+ -- Cray - Enable logging of BASIL communications with environment variables.
+ Set XML_LOG to enable logging. Set XML_LOG_LOC to specify path to log file
+ or "SLURM" to write to SlurmctldLogFile or unset for "slurm_basil_xml.log".
+ Patch from Steve Tronfinoff, CSCS.
+ -- FRONTEND - if a front end unexpectedly reboots kill all jobs but don't
+ mark front end node down.
+ -- FRONTEND - don't down a front end node if you have an epilog error
+ -- BLUEGENE - if a job has an epilog error don't down the midplane it was
+ running on.
+ -- BGQ - added new DebugFlag (NoRealTime) for only printing debug from
+ state change while the realtime server is running.
+ -- Fix multi-cluster mode with sview starting on a non-bluegene cluster going
+ to a bluegene cluster.
+ -- BLUEGENE - ability to show Rack Midplane name of midplanes in sview and
+ scontrol.
+
+* Changes in SLURM 2.4.0.pre3
+=============================
+ -- Let a job be submitted even if it exceeds a QOS limit. Job will be left
+ in a pending state until the QOS limit or job parameters change. Patch by
+ Phil Eckert, LLNL.
+ -- Add sacct support for the option "--name". Work by Yuri D'Elia, Center for
+ Biomedicine, EURAC Research, Italy.
+ -- BGQ - handle preemption.
+ -- Add an srun shepard process to cancel a job and/or step of the srun process
+ is killed abnormally (e.g. SIGKILL).
+ -- BGQ - handle deadlock issue when a nodeboard goes into an error state.
+ -- BGQ - more thorough handling of blocks with multiple jobs running on them.
+ -- Fix man2html process to compile in the build directory instead of the
+ source dir.
+ -- Behavior of srun --multi-prog modified so that any program arguments
+ specified on the command line will be appended to the program arguments
+ specified in the program configuration file.
+ -- Add new command, sdiag, which reports a variety of job scheduling
+ statistics. Based upon work by Alejandro Lucero Palau, BSC.
+ -- BLUEGENE - Added DefaultConnType to the bluegene.conf file. This makes it
+ so you can specify any connection type you would like (TORUS or MESH) as
+ the default in dynamic mode. Previously it always defaulted to TORUS.
+ -- Made squeue -n and -w options more consistent with salloc, sbatch, srun,
+ and scancel. Patch by Don Lipari, LLNL.
+ -- Have sacctmgr remove user records when no associations exist for that user.
+ -- Several header file changes for clean build with NetBSD. Patches from
+ Aleksej Saushev.
+ -- Fix for possible deadlock in accounting logic: Avoid calling
+ jobacct_gather_g_getinfo() until there is data to read from the socket.
+ -- Fix race condition that could generate "job_cnt_comp underflow" errors on
+ front-end architectures.
+ -- BGQ - Fix issue where a system with missing cables could cause core dump.
+
+* Changes in SLURM 2.4.0.pre2
+=============================
+ -- CRAY - Add support for GPU memory allocation using SLURM GRES (Generic
+ RESource) support. Work by Steve Trofinoff, CSCS.
+ -- Add support for job allocations with multiple job constraint counts. For
+ example: salloc -C "[rack1*2&rack2*4]" ... will allocate the job 2 nodes
+ from rack1 and 4 nodes from rack2. Support for only a single constraint
+ name been added to job step support.
+ -- BGQ - Remove old method for marking cnodes down.
+ -- BGQ - Remove BGP images from view in sview.
+ -- BGQ - print out failed cnodes in scontrol show nodes.
+ -- BGQ - Add srun option of "--runjob-opts" to pass options to the runjob
+ command.
+ -- FRONTEND - handle step launch failure better.
+ -- BGQ - Added a mutex to protect the now changing ba_system pointers.
+ -- BGQ - added new functionality for sub-block allocations - no preemption
+ for this yet though.
+ -- Add --name option to squeue to filter output by job name. Patch from Yuri
+ D'Elia.
+ -- BGQ - Added linking to runjob client libary which gives support to totalview
+ to use srun instead of runjob.
+ -- Add numeric range checks to scontrol update options. Patch from Phil
+ Eckert, LLNL.
+ -- Add ReconfigFlags configuration option to control actions of "scontrol
+ reconfig". Patch from Don Albert, Bull.
+ -- BGQ - handle reboots with multiple jobs running on a block.
+ -- BGQ - Add message handler thread to forward signals to runjob process.
+
+* Changes in SLURM 2.4.0.pre1
+=============================
+ -- BGQ - use the ba_geo_tables to figure out the blocks instead of the old
+ algorithm. The improves timing in the worst cases and simplifies the code
+ greatly.
+ -- BLUEGENE - Change to output tools labels from BP to Midplane
+ (i.e. BP List -> MidplaneList).
+ -- BLUEGENE - read MPs and BPs from the bluegene.conf
+ -- Modify srun's SIGINT handling logic timer (two SIGINTs within one second) to
+ be based microsecond rather than second timer.
+ -- Modify advance reservation to accept multiple specific block sizes rather
+ than a single node count.
+ -- Permit administrator to change a job's QOS to any value without validating
+ the job's owner has permission to use that QOS. Based upon patch by Phil
+ Eckert (LLNL).
+ -- Add trigger flag for a permanent trigger. The trigger will NOT be purged
+ after an event occurs, but only when explicitly deleted.
+ -- Interpret a reservation with Nodes=ALL and a Partition specification as
+ reserving all nodes within the specified partition rather than all nodes
+ on the system. Based upon patch by Phil Eckert (LLNL).
+ -- Add the ability to reboot all compute nodes after they become idle. The
+ RebootProgram configuration parameter must be set and an authorized user
+ must execute the command "scontrol reboot_nodes". Patch from Andriy
+ Grytsenko (Massive Solutions Limited).
+ -- Modify slurmdbd.conf parsing to accept DebugLevel strings (quiet, fatal,
+ info, etc.) in addition to numeric values. The parsing of slurm.conf was
+ modified in the same fashion for SlurmctldDebug and SlurmdDebug values.
+ The output of sview and "scontrol show config" was also modified to report
+ those values as strings rather than numeric values.
+ -- Changed default value of StateSaveLocation configuration parameter from
+ /tmp to /var/spool.
+ -- Prevent associations from being deleted if it has any jobs in running,
+ pending or suspended state. Previous code prevented this only for running
+ jobs.
+ -- If a job can not run due to QOS or association limits, then do not cancel
+ the job, but leave it pending in a system held state (priority = 1). The
+ job will run when its limits or the QOS/association limits change. Based
+ upon a patch by Phil Ekcert (LLNL).
+ -- BGQ - Added logic to keep track of cnodes in an error state inside of a
+ booted block.
+ -- Added the ability to update a node's NodeAddr and NodeHostName with
+ scontrol. Also enable setting a node's state to "future" using scontrol.
+ -- Add a node state flag of CLOUD and save/restore NodeAddr and NodeHostName
+ information for nodes with a flag of CLOUD.
+ -- Cray: Add support for job reservations with node IDs that are not in
+ numeric order. Fix for Bugzilla #5.
+ -- BGQ - Fix issue with smap -R
+ -- Fix association limit support for jobs queued for multiple partitions.
+ -- BLUEGENE - fix issue for sub-midplane systems to create a full system
+ block correctly.
+ -- BLUEGENE - Added option to the bluegene.conf to tell you are running on
+ a sub midplane system.
+ -- Added the UserID of the user issuing the RPC to the job_submit/lua
+ functions.
+ -- Fixed issue where if a job ended with ESLURMD_UID_NOT_FOUND and
+ ESLURMD_GID_NOT_FOUND where slurm would be a little over zealous
+ in treating missing a GID or UID as a fatal error.
+ -- If job time limit exceeds partition maximum, but job's minimum time limit
+ does not, set job's time limit to partition maximum at allocation time.
+
* Changes in SLURM 2.3.6
========================
-- Fix DefMemPerCPU for partition definitions.
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index e3e4cfa..3587218 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,10 +1,10 @@
-RELEASE NOTES FOR SLURM VERSION 2.3
-28 July 2011
+RELEASE NOTES FOR SLURM VERSION 2.4
+25 May 2012
IMPORTANT NOTE:
If using the slurmdbd (SLURM DataBase Daemon) you must update this first.
-The 2.3 slurmdbd will work with SLURM daemons of version 2.1.3 and above.
+The 2.4 slurmdbd will work with SLURM daemons of version 2.1.3 and above.
You will not need to update all clusters at the same time, but it is very
important to update slurmdbd first and having it running before updating
any other clusters making use of it. No real harm will come from updating
@@ -18,202 +18,104 @@
under the [mysqld] reference in the my.cnf file and restarting the mysqld.
This is needed when converting large tables over to the new database schema.
-SLURM can be upgraded from version 2.2 to version 2.3 without loss of jobs or
+SLURM can be upgraded from version 2.3 to version 2.4 without loss of jobs or
other state information.
HIGHLIGHTS
==========
-* Support has been added for Cray XT and XE computers
-* Support has been added for BlueGene/Q computers.
-* For architectures where the slurmd daemon executes on front end nodes (Cray
- and BlueGene systems) more than one slurmd daemon may be executed using more
- than one front end node for improved fault-tolerance and performance.
- NOTE: The slurmctld daemon will report the lack of a front_end_state file
- as an error when first started in this configuration.
-* The ability to expand running jobs was added
-* The ability to control how many leaf switches a job is allocated and the
- maximum delay to get that leaf switch count can be controlled.
+* Up to 500% higher throughput for short-lived jobs (depending upon
+ configuration).
+* Major modifications to support IBM BlueGene/Q systems.
+* New SPANK callbacks added to slurmd: slurm_spank_slurmd_{init,exit} and
+ job epilog/prolog: slurm_spank_job_{prolog,epilog}.
+* Added MPI plugin, mpi/pmi2, which supports MPI_Comm_spawn() function.
CONFIGURATION FILE CHANGES (see "man slurm.conf" for details)
=============================================================
-* In order to support more than one front end node, new parameters have been
- added to support a new data structure: FrontendName, FrontendAddr, Port,
- State and Reason.
-* Added DebugFlags option of Frontend
-* Added new configuration parameter MaxJobId. Use with FirstJobId to limit
- range of job ID values.
-* Added new configuration parameter MaxStepCount to limit the effect of
- bad batch scripts. The default value is 40,000 steps per job.
-* Changed node configuration parameter from "Procs" to "CPUs". Both parameters
- will be supported for now.
-* Added GraceTime to Partition and QOS data structures. Preempted jobs will be
- given this time interval before termination.
-* Added AccountingStoreJobComment to control storing job's comment field in
- the accounting database.
-* More than one TaskPlugin can be configured in a comma separated list.
-* DefMemPerCPU, DefMemPerNode, MaxMemPerCPU and MaxMemPerNode configuration
- options added on a per-partition basis.
-* SchedulerParameters can now control the maximum delay that a job can set in
- order to be allocated some desired leaf switch count by specifying a value
- for max_switch_wait.
+* "PriorityFlags" added
+* "RebootProgram" added
+* "ReconfigFlags" added
+* "SlurmdDebugLevel" and "SlurmctldDebugLevel" now accept string names in
+ addition to numeric values (e.g. "info", "verbose", "debug", etc.). Output
+ of scontrol and sview commands also use the string names.
+* Changed default value of "StateSaveLocation" configuration parameter from
+ "/tmp" to "/var/spool" to help avoid purging.
+* Change default "SchedulerParameters" "max_switch_wait" field value from 60 to
+ 300 seconds.
+* Added new "SchedulerParameters" of "bf_max_job_user", maximum number of jobs
+ to attempt backfilling per user.
+* Modify SchedulerParamters option to match documentation: "bf_res="
+ changed to "bf_resolution=".
COMMAND CHANGES (see man pages for details)
===========================================
-* Added scontrol ability to get and set front end node state.
-* Added scontrol ability to set slurmctld's DebugFlags.
-* Added scontrol ability to increment or decrement a job or step time limit.
-* Added new scontrol option of "show aliases" to report every NodeName that is
- associated with a given NodeHostName when running multiple slurmd daemons
- per compute node (typically used for testing purposes).
-* Added new squeue optioni of -R/--reservation option as a job filter.
-* A reservation flag of "License_Only" has been added for use by the sview and
- scontrol commands. If set, then jobs using the reservation may use the
- licenses associated with it plus any compute nodes. Otherwise the job is
- limited to the compute nodes associated with the reservation.
-* The dependency option of "expand" has been added. This option identifies a
- job whose resource allocation is intended to be used to expand the allocation
- of another job. See http://www.schedmd.com/slurmdocs//faq.html#job_size
- for a description of it's use.
-* Added --switches option to salloc, sbatch and srun commands to control the
- desired number of switches allocated to a job and the maximum delay before
- starting the job with more leaf switches.
-* Added scontrol ability to modify a job's desired switch count or delay.
+* Modified advance reservation to select resources optimized for network
+ topology and accept multiple specific block sizes rather than a single node
+ count.
+* Added trigger flag for a permanent trigger. The trigger will NOT be purged
+ after an event occurs, but only when explicitly deleted.
+* Added the ability to reboot all compute nodes after they become idle. The
+ RebootProgram configuration parameter must be set and an authorized user
+ must execute the command "scontrol reboot_nodes".
+* Added the ability to update a node's NodeAddr and NodeHostName with scontrol.
+* Added the option "--name" to the sacct and squeue commands.
+* Add support for job allocations with multiple job constraint counts. For
+ example: salloc -C "[rack1*2&rack2*4]" ... will allocate the job 2 nodes
+ from rack1 and 4 nodes from rack2. Support for only a single constraint
+ name been added to job step support.
+* Changed meaning of squeue "-n" option to job name from node name for
+ consistency with other commands. The "-w" option was added for a short
+ node name option. Long options --names and --nodes remain unchanged.
+* Sinfo output format of "%P" now prints "*" after default partition even if
+ no field width is specified (previously included "*" only if no field width
+ was specified. Added output format of "%R" to print partition name only
+ without identifying the default partition with "*").
+* Added cpu_run_min to the output of sshare --long.
+* Modify scontrol to require "-dd" option to report batch job's script.
-BLUEGENE SPECIFIC CHANGES
-=========================
-* Bluegene/Q support added.
-* The select/bluegene plugin has been substantially re-written.
OTHER CHANGES
=============
-* Improved accuracy of estimated job start time for pending jobs. This should
- substantially improve scheduling of jobs elibable to execute on more than one
- cluster.
-* Job dependency information will only show the currently active dependencies
- rather than the original dependencies.
-* Added a reservation flag of "License_Only". If set, then jobs using the
- reservation may use the licenses associated with it plus any compute nodes.
-* Added proctrack/cgroup and task/cgroup plugins to support Linux cgroups.
+* Improve task binding logic by making fuller use of HWLOC library,
+ especially with respect to Opteron 6000 series processors.
+* Changde to output tools labels from "BP" to "Midplane" (i.e. "BP_List" was
+ changed to "MidplaneList").
+* Modified srun to fork a processes which can terminate the job and/or step
+ allocation if the initial srun process is abnormallly terminated (e.g. by
+ SIGKILL).
+* Added support for Cray GPU memory allocation as GRES (Generic RESources).
+* Correct setting of CUDA_VISIBLE_DEVICES for gres/gpu plugin if device files
+ to be used are not in numeric order (e.g. GPU 1 maps to "/dev/nvidia4").
+* Cray - Add support for zero compute note resource allocation to run batch
+ script on front-end node with no ALPS reservation. Useful for pre- or post-
+ processing. NOTE: The partition must be configured with MinNodes=0.
API CHANGES
===========
+* Added the UserID of the user issuing the RPC to the job_submit/lua functions.
Changed members of the following structs
========================================
-block_info_t
- Added job_list
- Added used_mp_inx
- Added used_mp_str
- bp_inx -> mp_inx
- conn_type -> conn_type(DIMENSIONS]
- ionodes -> ionode_str
- nodes -> mp_str
- node_cnt -> cnode_cnt
-job_desc_msg_t
- conn_type -> conn_type(DIMENSIONS]
-
-job_step_info_t
- Added select_jobinfo
-
-partition_info_t
- Added def_mem_per_cpu and max_mem_per_cpu
Added the following struct definitions
======================================
-block_job_info_t entirely new structure
-
-front_end_info_msg_t entirely new structure
-
-front_end_info_t entirely new structure
-
-job_info_t
- batch_host name of the host running the batch script
- batch_script contents of batch script
- preempt_time time that a job become preempted
- req_switches maximum number of leaf switches
- wait4switches maximum delay to get desired leaf switch count
-
-job_step_create_response_msg_t
- select_jobinfo data needed from the select plugin for a step
-
-job_step_info_t
- select_jobinfo data needed from the select plugin for a step
-
-node_info_t
- node_addr communication name (optional)
- node_hostname node's hostname (optional)
-
-partition_info_t
- grace_time preempted job's grace time in seconds
-
-slurm_ctl_conf
- acctng_store_job_comment if set, store job's comment field in
- accounting database
- max_job_id maximum supported job id before starting over
- with first_job_id
- max_step_count maximum number of job steps permitted per job
-
-slurm_step_layout
- front_end name of front end host running the step
-
-slurmdb_qos_rec_t
- grace_time preempted job's grace time in seconds
-
-update_front_end_msg_t entirely new structure
-
+block_info_t: cnode_err_cnt added
+slurm_ctl_conf_t priority_flags, reboot_program and reconfig_flags added
+trigger_info_t: flags added
+update_node_msg_t: node_addr and node_hostname added
+slurmdb_association_cond_t: grp_mem_list added
+slurmdb_association_rec_t: grp_mem added
+slurmdb_qos_rec_t: grp_mem added
Changed the following enums and #defines
========================================
-job_state_reason
- FAIL_BANK_ACCOUNT -> FAIL_ACCOUNT
- FAIL_QOS /* invalid QOS */
- WAIT_QOS_THRES /* required QOS threshold has been breached */
-
-select_jobdata_type (Size of many data structures increased)
- SELECT_JOBDATA_BLOCK_NODE_CNT /* data-> uint32_t block_cnode_cnt */
- SELECT_JOBDATA_BLOCK_PTR /* data-> bg_record_t *bg_record */
- SELECT_JOBDATA_DIM_CNT /* data-> uint16_t dim_cnt */
- SELECT_JOBDATA_NODE_CNT /* data-> uint32_t cnode_cnt */
- SELECT_JOBDATA_PAGG_ID /* data-> uint64_t job container ID */
- SELECT_JOBDATA_PTR /* data-> select_jobinfo_t *jobinfo */
- SELECT_JOBDATA_START_LOC /* data-> uint16_t
- * start_loc[SYSTEM_DIMENSIONS] */
-select_jobdata_type (Added)
- SELECT_PRINT_START_LOC /* Print just the start location */
-select_jobdata_type (Names changed)
- SELECT_GET_BP_CPU_CNT --> SELECT_GET_MP_CPU_CNT
- SELECT_SET_BP_CNT ------>SELECT_SET_MP_CNT
-
-select_nodedata_type
- SELECT_NODEDATA_PTR /* data-> select_nodeinfo_t *nodeinfo */
-
-select_print_mode
- SELECT_PRINT_START_LOC /* Print just the start location */
-
-select_type_plugin_info no longer exists. It's contents are now mostly #defines
-
-DEBUG_FLAG_FRONT_END added DebugFlags of Frontend
-
-JOB_PREEMPTED added new job termination state to indicated
- job termination was due to preemption
-
-RESERVE_FLAG_LIC_ONLY reserve licenses only, use any nodes
-
-TRIGGER_RES_TYPE_FRONT_END added trigger for frontend state changes
+TRIGGER_FLAG_PERM Added
Added the following API's
=========================
-slurm_free_front_end_info_msg free front end state information
-slurm_init_update_front_end_msg initialize data structure for front end update
-slurm_load_front_end load front end state information
-slurm_print_front_end_info_msg print all front end state information
-slurm_print_front_end_table print state information for one front end node
-slurm_set_debugflags set new DebugFlags in slurmctld daemon
-slurm_sprint_front_end_table output state information for one front end node
-slurm_update_front_end update state of front end node
Changed the following API's
diff --git a/RELEASE_NOTES_LLNL b/RELEASE_NOTES_LLNL
deleted file mode 100644
index de8ffaf..0000000
--- a/RELEASE_NOTES_LLNL
+++ /dev/null
@@ -1,10 +0,0 @@
-LLNL CHAOS-SPECIFIC RELEASE NOTES FOR SLURM VERSION 2.3
-3 January 2011
-
-This lists only the most significant changes from SLURM v2.2 to v2.3
-with respect to Chaos systems. See the file RELEASE_NOTES for a more
-complete description of changes.
-
-Mostly for system administrators:
-
-Mostly for users:
diff --git a/Support b/Support
deleted file mode 100644
index d10cfe2..0000000
--- a/Support
+++ /dev/null
@@ -1,77 +0,0 @@
-Here is a list of the major SLURM components and who has
-expertise in those areas. Anyone listed as a "Primary"
-has extensive knowledge of the component and is typically
-the author. Anyone listed as a "Backup" has a working
-knowledge of the component and should be able to address
-problems. Where I list the primary in parenthesis, that
-indicates the person is actively developing the expertise
-to serve as the primary contact for that component.
-
-Command/Daemon Primary Backup Notes
-============== ======= ======= =====
-sacct DA very active
-sacctmgr DA very active
-salloc (DL) DA,MJ stable
-sattach (DL) DA,MJ stable
-sbatch (DL) DA,MJ stable
-sbcast MJ very stable
-scancel DA,MJ very stable
-scontrol MJ,DA stable
-sinfo MJ,DA stable
-slurmctld MJ DA
-slurmd (DL) DA,MJ
-slurmstepd (DL) DA,MJ
-slurmdbd DA very active
-smap DA MJ
-sprio DL
-squeue MJ DA stable
-sreport DA very active
-srun (DL) DA,MJ stable
-sshare DA
-sstat DA
-strigger MJ very stable
-sview DA MJ
-
-Plugins Primary Backup Notes
-============== ======= ======= =====
-acct_storage DA MJ very active
-auth MJ DA very stable
-checkpoint MJ,NUDT
-crypto MJ DA very stable
-jobacct_gather DA MJ stable
-jobcomp MJ DA very stable
-mpi MJ active for OpenMPI
-preempt MJ new
-priority DL DA
-proctrack MJ DA very stable
-sched MJ DA
-select/bluegene DA MJ
-select/cons_res MJ HP,DA
-select/linear MJ DA stable
-switch MJ DA very stable
-task DL MG,HP
-topology MJ
-
-Other Primary Backup Notes
-============== ======= ======= =====
-api DA,MJ
-common DA,MJ
-database DA DL very active
-pam MG very stable
-perlapi DA stable
-phpext DA stable
-python NSC stable
-sjstat PE stable
-slurmdb_direct DA
-testsuite DA,MJ
-torque DA stable
-
-
-DA Danny Auble
-DL Don Lipari
-HP HP
-MG Mark Grondona
-MJ Moe Jette
-NSC National Supercomputer Center (Sweden)
-NUDT National University of Defense Technology (China)
-PE Phil Eckert
diff --git a/aclocal.m4 b/aclocal.m4
index d1afa71..9c3d50c 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,7 +1,8 @@
-# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.11.3 -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+# Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -376,12 +377,15 @@
fi[]dnl
])# PKG_CHECK_MODULES
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
+# Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_AUTOMAKE_VERSION(VERSION)
# ----------------------------
# Automake X.Y traces this macro to ensure aclocal.m4 has been
@@ -391,7 +395,7 @@
[am__api_version='1.11'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.11.1], [],
+m4_if([$1], [1.11.3], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -407,19 +411,21 @@
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.1])dnl
+[AM_AUTOMAKE_VERSION([1.11.3])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
@@ -501,14 +507,14 @@
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
-# Free Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-# serial 10
+# serial 12
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
# written in clear, in which case automake, when reading aclocal.m4,
@@ -548,6 +554,7 @@
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
+ rm -rf conftest.dir
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
@@ -612,7 +619,7 @@
break
fi
;;
- msvisualcpp | msvcmsys)
+ msvc7 | msvc7msys | msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
@@ -677,10 +684,13 @@
if test "x$enable_dependency_tracking" != xno; then
am_depcomp="$ac_aux_dir/depcomp"
AMDEPBACKSLASH='\'
+ am__nodep='_no'
fi
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
AC_SUBST([AMDEPBACKSLASH])dnl
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
])
# Generate code to set up dependency tracking. -*- Autoconf -*-
@@ -902,12 +912,15 @@
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001, 2003, 2005, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
+# Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_INSTALL_SH
# ------------------
# Define $install_sh.
@@ -947,8 +960,8 @@
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008
-# Free Software Foundation, Inc.
+# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008,
+# 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -968,7 +981,7 @@
[disable], [m4_define([am_maintainer_other], [enable])],
[m4_define([am_maintainer_other], [enable])
m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
-AC_MSG_CHECKING([whether to am_maintainer_other maintainer-specific portions of Makefiles])
+AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
dnl maintainer-mode's default is 'disable' unless 'enable' is passed
AC_ARG_ENABLE([maintainer-mode],
[ --][am_maintainer_other][-maintainer-mode am_maintainer_other make rules and dependencies not useful
@@ -1079,12 +1092,15 @@
fi
])
-# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
+# Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_MKDIR_P
# ---------------
# Check for `mkdir -p'.
@@ -1107,13 +1123,14 @@
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001, 2002, 2003, 2005, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
+# Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-# serial 4
+# serial 5
# _AM_MANGLE_OPTION(NAME)
# -----------------------
@@ -1121,13 +1138,13 @@
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
# _AM_SET_OPTION(NAME)
-# ------------------------------
+# --------------------
# Set option NAME. Presently that only means defining a flag for this option.
AC_DEFUN([_AM_SET_OPTION],
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
# _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
+# ------------------------
# OPTIONS is a space-separated list of Automake options.
AC_DEFUN([_AM_SET_OPTIONS],
[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
@@ -1203,12 +1220,14 @@
fi
AC_MSG_RESULT(yes)])
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_INSTALL_STRIP
# ---------------------
# One issue with vendor `install' (even GNU) is that you can't
@@ -1231,13 +1250,13 @@
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-# serial 2
+# serial 3
# _AM_SUBST_NOTMAKE(VARIABLE)
# ---------------------------
@@ -1246,13 +1265,13 @@
AC_DEFUN([_AM_SUBST_NOTMAKE])
# AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
+# --------------------------
# Public sister of _AM_SUBST_NOTMAKE.
AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1274,10 +1293,11 @@
# a tarball read from stdin.
# $(am__untar) < result.tar
AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
+[# Always define AMTAR for backward compatibility. Yes, it's still used
+# in the wild :-( We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
m4_if([$1], [v7],
- [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+ [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
[m4_case([$1], [ustar],, [pax],,
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
diff --git a/autogen.sh b/autogen.sh
index 94255d6..0587917 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -111,4 +111,4 @@
echo
echo "NOTE: This script has most likely just modified files that are under"
echo " version control. Make sure that you really want these changes"
-echo " applied to the repository before you run \"svn commit\"."
+echo " applied to the repository before you run \"git commit\"."
diff --git a/auxdir/Makefile.in b/auxdir/Makefile.in
index e4040ff..8e7f169 100644
--- a/auxdir/Makefile.in
+++ b/auxdir/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -162,6 +162,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -198,6 +199,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -410,10 +412,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/auxdir/acx_pthread.m4 b/auxdir/acx_pthread.m4
index ed9885e..22ff8c5 100644
--- a/auxdir/acx_pthread.m4
+++ b/auxdir/acx_pthread.m4
@@ -144,7 +144,7 @@
# functions on Solaris that doesn't have a non-functional libc stub.
# We try pthread_create on general principles.
AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], [[pthread_t th; pthread_join(th, 0);
- pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0); pthread_atfork(NULL, NULL, NULL);
pthread_create(0,0,0,0); pthread_cleanup_pop(0); ]])],[acx_pthread_ok=yes],[])
LIBS="$save_LIBS"
diff --git a/auxdir/config.guess b/auxdir/config.guess
index 40eaed4..d622a44 100755
--- a/auxdir/config.guess
+++ b/auxdir/config.guess
@@ -2,9 +2,9 @@
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2011-05-11'
+timestamp='2012-02-10'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -57,8 +55,8 @@
Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -145,7 +143,7 @@
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
# switched to ELF, *-*-netbsd* would select the old
# object file format. This provides both forward
@@ -792,13 +790,12 @@
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ case ${UNAME_PROCESSOR} in
amd64)
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
*)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
esac
exit ;;
i*:CYGWIN*:*)
@@ -807,6 +804,9 @@
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
+ i*:MSYS*:*)
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
i*:windows32*:*)
# uname -m includes "-pc" on this system.
echo ${UNAME_MACHINE}-mingw32
@@ -861,6 +861,13 @@
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
exit ;;
+ aarch64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ aarch64_be:Linux:*:*)
+ UNAME_MACHINE=aarch64_be
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
EV5) UNAME_MACHINE=alphaev5 ;;
@@ -895,13 +902,16 @@
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
cris:Linux:*:*)
- echo cris-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
crisv32:Linux:*:*)
- echo crisv32-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
frv:Linux:*:*)
- echo frv-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ hexagon:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
i*86:Linux:*:*)
LIBC=gnu
@@ -943,7 +953,7 @@
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
or32:Linux:*:*)
- echo or32-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
padre:Linux:*:*)
echo sparc-unknown-linux-gnu
@@ -978,13 +988,13 @@
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
tile*:Linux:*:*)
- echo ${UNAME_MACHINE}-tilera-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;;
x86_64:Linux:*:*)
- echo x86_64-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
xtensa*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -1315,6 +1325,9 @@
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
+ x86_64:VMkernel:*:*)
+ echo ${UNAME_MACHINE}-unknown-esx
+ exit ;;
esac
#echo '(No uname command or uname output not recognized.)' 1>&2
diff --git a/auxdir/config.sub b/auxdir/config.sub
index 30fdca8..c894da4 100755
--- a/auxdir/config.sub
+++ b/auxdir/config.sub
@@ -2,9 +2,9 @@
# Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2011-03-23'
+timestamp='2012-02-10'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
@@ -21,9 +21,7 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -76,8 +74,8 @@
GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -132,6 +130,10 @@
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
;;
+ android-linux)
+ os=-linux-android
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+ ;;
*)
basic_machine=`echo $1 | sed 's/-[^-]*$//'`
if [ $basic_machine != $1 ]
@@ -247,17 +249,22 @@
# Some are omitted here because they have special meanings below.
1750a | 580 \
| a29k \
+ | aarch64 | aarch64_be \
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
| am33_2.0 \
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+ | be32 | be64 \
| bfin \
| c4x | clipper \
| d10v | d30v | dlx | dsp16xx \
+ | epiphany \
| fido | fr30 | frv \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | hexagon \
| i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \
+ | le32 | le64 \
| lm32 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \
| maxq | mb | microblaze | mcore | mep | metag \
@@ -291,7 +298,7 @@
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \
- | rx \
+ | rl78 | rx \
| score \
| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh64 | sh64le \
@@ -300,7 +307,7 @@
| spu \
| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| ubicom32 \
- | v850 | v850e \
+ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
| we32k \
| x86 | xc16x | xstormy16 | xtensa \
| z8k | z80)
@@ -315,8 +322,7 @@
c6x)
basic_machine=tic6x-unknown
;;
- m6811 | m68hc11 | m6812 | m68hc12 | picochip)
- # Motorola 68HC11/12.
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
basic_machine=$basic_machine-unknown
os=-none
;;
@@ -329,7 +335,10 @@
strongarm | thumb | xscale)
basic_machine=arm-unknown
;;
-
+ xgate)
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
xscaleeb)
basic_machine=armeb-unknown
;;
@@ -352,11 +361,13 @@
# Recognize the basic CPU types with company name.
580-* \
| a29k-* \
+ | aarch64-* | aarch64_be-* \
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \
+ | be32-* | be64-* \
| bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \
@@ -365,8 +376,10 @@
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
| h8300-* | h8500-* \
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+ | hexagon-* \
| i*86-* | i860-* | i960-* | ia64-* \
| ip2k-* | iq2000-* \
+ | le32-* | le64-* \
| lm32-* \
| m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
@@ -400,7 +413,7 @@
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
| pyramid-* \
- | romp-* | rs6000-* | rx-* \
+ | rl78-* | romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
@@ -408,10 +421,11 @@
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
| tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tile-* | tilegx-* \
+ | tile*-* \
| tron-* \
| ubicom32-* \
- | v850-* | v850e-* | vax-* \
+ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+ | vax-* \
| we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \
@@ -711,7 +725,6 @@
i370-ibm* | ibm*)
basic_machine=i370-ibm
;;
-# I'm not sure what "Sysv32" means. Should this be sysv3.2?
i*86v32)
basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv32
@@ -808,10 +821,18 @@
ms1-*)
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
;;
+ msys)
+ basic_machine=i386-pc
+ os=-msys
+ ;;
mvs)
basic_machine=i370-ibm
os=-mvs
;;
+ nacl)
+ basic_machine=le32-unknown
+ os=-nacl
+ ;;
ncr3000)
basic_machine=i486-ncr
os=-sysv4
@@ -1120,13 +1141,8 @@
basic_machine=t90-cray
os=-unicos
;;
- # This must be matched before tile*.
- tilegx*)
- basic_machine=tilegx-unknown
- os=-linux-gnu
- ;;
tile*)
- basic_machine=tile-unknown
+ basic_machine=$basic_machine-unknown
os=-linux-gnu
;;
tx39)
@@ -1336,7 +1352,7 @@
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -mingw32* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \
@@ -1548,9 +1564,6 @@
;;
m68000-sun)
os=-sunos3
- # This also exists in the configure program, but was not the
- # default.
- # os=-sunos4
;;
m68*-cisco)
os=-aout
diff --git a/auxdir/depcomp b/auxdir/depcomp
index df8eea7..bd0ac08 100755
--- a/auxdir/depcomp
+++ b/auxdir/depcomp
@@ -1,10 +1,10 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2011-12-04.11; # UTC
-# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009 Free
-# Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
+# 2011 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@
object Object file output by `PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
- tmpdepfile Temporary file to use when outputing dependencies.
+ tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
@@ -90,10 +90,18 @@
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
- cygpath_u="sed s,\\\\\\\\,/,g"
+ cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
+if test "$depmode" = msvc7msys; then
+ # This is just like msvc7 but w/o cygpath translation.
+ # Just convert the backslash-escaped backslashes to single forward
+ # slashes to satisfy depend.m4
+ cygpath_u='sed s,\\\\,/,g'
+ depmode=msvc7
+fi
+
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
@@ -158,10 +166,12 @@
' < "$tmpdepfile" |
## Some versions of gcc put a space before the `:'. On the theory
## that the space means something, we add a space to the output as
-## well.
+## well. hp depmode also adds that space, but also prefixes the VPATH
+## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+ | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
@@ -405,6 +415,52 @@
rm -f "$tmpdepfile"
;;
+msvc7)
+ if test "$libtool" = yes; then
+ showIncludes=-Wc,-showIncludes
+ else
+ showIncludes=-showIncludes
+ fi
+ "$@" $showIncludes > "$tmpdepfile"
+ stat=$?
+ grep -v '^Note: including file: ' "$tmpdepfile"
+ if test "$stat" = 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ # The first sed program below extracts the file names and escapes
+ # backslashes for cygpath. The second sed program outputs the file
+ # name when reading, but also accumulates all include files in the
+ # hold buffer in order to output them again at the end. This only
+ # works with sed implementations that can handle large buffers.
+ sed < "$tmpdepfile" -n '
+/^Note: including file: *\(.*\)/ {
+ s//\1/
+ s/\\/\\\\/g
+ p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/ \1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+ s/.*/ /
+ G
+ p
+}' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+msvc7msys)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
@@ -503,7 +559,9 @@
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
+ # makedepend may prepend the VPATH from the source file name to the object.
+ # No need to regex-escape $object, excess matching of '.' is harmless.
+ sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
sed '1,2d' "$tmpdepfile" | tr ' ' '
' | \
## Some versions of the HPUX 10.20 sed can't process this invocation
diff --git a/auxdir/install-sh b/auxdir/install-sh
index 6781b98..a9244eb 100755
--- a/auxdir/install-sh
+++ b/auxdir/install-sh
@@ -1,7 +1,7 @@
#!/bin/sh
# install - install a program, script, or datafile
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2011-01-19.21; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
@@ -156,6 +156,10 @@
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
+ # Protect names problematic for `test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
shift;;
-T) no_target_directory=true;;
@@ -186,6 +190,10 @@
fi
shift # arg
dst_arg=$arg
+ # Protect names problematic for `test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
done
fi
@@ -200,7 +208,11 @@
fi
if test -z "$dir_arg"; then
- trap '(exit $?); exit' 1 2 13 15
+ do_exit='(exit $ret); exit $ret'
+ trap "ret=129; $do_exit" 1
+ trap "ret=130; $do_exit" 2
+ trap "ret=141; $do_exit" 13
+ trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
@@ -228,9 +240,9 @@
for src
do
- # Protect names starting with `-'.
+ # Protect names problematic for `test' and other utilities.
case $src in
- -*) src=./$src;;
+ -* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
@@ -252,12 +264,7 @@
echo "$0: no destination specified." >&2
exit 1
fi
-
dst=$dst_arg
- # Protect names starting with `-'.
- case $dst in
- -*) dst=./$dst;;
- esac
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
@@ -385,7 +392,7 @@
case $dstdir in
/*) prefix='/';;
- -*) prefix='./';;
+ [-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
@@ -403,7 +410,7 @@
for d
do
- test -z "$d" && continue
+ test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
diff --git a/auxdir/libtool.m4 b/auxdir/libtool.m4
index 8ff3c76..828104c 100644
--- a/auxdir/libtool.m4
+++ b/auxdir/libtool.m4
@@ -1,8 +1,8 @@
# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is free software; the Free Software Foundation gives
@@ -11,8 +11,8 @@
m4_define([_LT_COPYING], [dnl
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -146,6 +146,8 @@
AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
+_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
+dnl
_LT_DECL([], [host_alias], [0], [The host system])dnl
_LT_DECL([], [host], [0])dnl
_LT_DECL([], [host_os], [0])dnl
@@ -637,7 +639,7 @@
m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
configured by $[0], generated by m4_PACKAGE_STRING.
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2011 Free Software Foundation, Inc.
This config.lt script is free software; the Free Software Foundation
gives unlimited permision to copy, distribute and modify it."
@@ -801,6 +803,7 @@
m4_case([$1],
[C], [_LT_LANG(C)],
[C++], [_LT_LANG(CXX)],
+ [Go], [_LT_LANG(GO)],
[Java], [_LT_LANG(GCJ)],
[Fortran 77], [_LT_LANG(F77)],
[Fortran], [_LT_LANG(FC)],
@@ -822,6 +825,31 @@
])# _LT_LANG
+m4_ifndef([AC_PROG_GO], [
+############################################################
+# NOTE: This macro has been submitted for inclusion into #
+# GNU Autoconf as AC_PROG_GO. When it is available in #
+# a released version of Autoconf we should remove this #
+# macro and use it instead. #
+############################################################
+m4_defun([AC_PROG_GO],
+[AC_LANG_PUSH(Go)dnl
+AC_ARG_VAR([GOC], [Go compiler command])dnl
+AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
+_AC_ARG_VAR_LDFLAGS()dnl
+AC_CHECK_TOOL(GOC, gccgo)
+if test -z "$GOC"; then
+ if test -n "$ac_tool_prefix"; then
+ AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
+ fi
+fi
+if test -z "$GOC"; then
+ AC_CHECK_PROG(GOC, gccgo, gccgo, false)
+fi
+])#m4_defun
+])#m4_ifndef
+
+
# _LT_LANG_DEFAULT_CONFIG
# -----------------------
m4_defun([_LT_LANG_DEFAULT_CONFIG],
@@ -852,6 +880,10 @@
m4_ifdef([LT_PROG_GCJ],
[m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
+AC_PROVIDE_IFELSE([AC_PROG_GO],
+ [LT_LANG(GO)],
+ [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
+
AC_PROVIDE_IFELSE([LT_PROG_RC],
[LT_LANG(RC)],
[m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
@@ -954,7 +986,13 @@
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -962,6 +1000,7 @@
rm -rf libconftest.dylib*
rm -f conftest.*
fi])
+
AC_CACHE_CHECK([for -exported_symbols_list linker flag],
[lt_cv_ld_exported_symbols_list],
[lt_cv_ld_exported_symbols_list=no
@@ -973,6 +1012,7 @@
[lt_cv_ld_exported_symbols_list=no])
LDFLAGS="$save_LDFLAGS"
])
+
AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
[lt_cv_ld_force_load=no
cat > conftest.c << _LT_EOF
@@ -990,7 +1030,9 @@
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -1035,8 +1077,8 @@
])
-# _LT_DARWIN_LINKER_FEATURES
-# --------------------------
+# _LT_DARWIN_LINKER_FEATURES([TAG])
+# ---------------------------------
# Checks for linker and compiler features on darwin
m4_defun([_LT_DARWIN_LINKER_FEATURES],
[
@@ -1047,6 +1089,8 @@
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
_LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+ m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
+ [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes])
else
_LT_TAGVAR(whole_archive_flag_spec, $1)=''
fi
@@ -1330,14 +1374,27 @@
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if AC_TRY_EVAL(ac_compile); then
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -1414,13 +1471,13 @@
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -1600,6 +1657,11 @@
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -1639,7 +1701,7 @@
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -2185,7 +2247,7 @@
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -2194,7 +2256,7 @@
;;
aix[[4-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -2259,7 +2321,7 @@
;;
bsdi[[45]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -2398,7 +2460,7 @@
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -2406,10 +2468,6 @@
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -2417,7 +2475,7 @@
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[[123]]*) objformat=aout ;;
+ freebsd[[23]].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -2435,7 +2493,7 @@
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[[01]]* | freebsdelf3.[[01]]*)
@@ -2455,7 +2513,7 @@
;;
gnu*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
@@ -2466,7 +2524,7 @@
;;
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -2527,7 +2585,7 @@
;;
interix[[3-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -2543,7 +2601,7 @@
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -2580,9 +2638,9 @@
dynamic_linker=no
;;
-# This must be Linux ELF.
+# This must be glibc/ELF.
linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2657,7 +2715,7 @@
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -2726,7 +2784,7 @@
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2751,7 +2809,7 @@
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2775,7 +2833,7 @@
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2806,7 +2864,7 @@
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2816,7 +2874,7 @@
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -3238,7 +3296,7 @@
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
+# This must be glibc/ELF.
linux* | k*bsd*-gnu | kopensolaris*-gnu)
lt_cv_deplibs_check_method=pass_all
;;
@@ -3658,6 +3716,7 @@
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK ['"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -4242,7 +4301,9 @@
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Xcompiler -fPIC'
+ if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
+ fi
;;
esac
else
@@ -4334,18 +4395,33 @@
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
;;
+ *Intel*\ [[CF]]*Compiler*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ ;;
+ *Portland\ Group*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ ;;
esac
;;
esac
@@ -4505,7 +4581,9 @@
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
_LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
@@ -4533,7 +4611,6 @@
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -4787,8 +4864,7 @@
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
_LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir'
+ _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
_LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
_LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
@@ -5084,6 +5160,7 @@
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
_LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
@@ -5130,10 +5207,6 @@
_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
;;
- freebsd1*)
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -5146,7 +5219,7 @@
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_minus_L, $1)=yes
@@ -5185,7 +5258,6 @@
fi
if test "$with_gnu_ld" = no; then
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
_LT_TAGVAR(hardcode_libdir_separator, $1)=:
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
@@ -5627,9 +5699,6 @@
_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
[Flag to hardcode $libdir into a binary during linking.
This must work even if $libdir does not exist])
-_LT_TAGDECL([], [hardcode_libdir_flag_spec_ld], [1],
- [[If ld is used when linking, flag to hardcode $libdir into a binary
- during linking. This must work even if $libdir does not exist]])
_LT_TAGDECL([], [hardcode_libdir_separator], [1],
[Whether we need a single "-rpath" flag with a separated argument])
_LT_TAGDECL([], [hardcode_direct], [0],
@@ -5787,7 +5856,6 @@
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -6157,7 +6225,7 @@
esac
;;
- freebsd[[12]]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
_LT_TAGVAR(ld_shlibs, $1)=no
@@ -6918,12 +6986,18 @@
}
};
_LT_EOF
+], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
+package foo
+func foo() {
+}
+_LT_EOF
])
_lt_libdeps_save_CFLAGS=$CFLAGS
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
dnl Parse the compiler output and extract the necessary
@@ -7120,7 +7194,6 @@
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7253,7 +7326,6 @@
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7440,6 +7512,77 @@
])# _LT_LANG_GCJ_CONFIG
+# _LT_LANG_GO_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Go compiler
+# are suitably defined. These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GO_CONFIG],
+[AC_REQUIRE([LT_PROG_GO])dnl
+AC_LANG_SAVE
+
+# Source file extension for Go test sources.
+ac_ext=go
+
+# Object file extension for compiled Go test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="package main; func main() { }"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='package main; func main() { }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GOC-"gccgo"}
+CFLAGS=$GOFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# Go did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+ _LT_COMPILER_NO_RTTI($1)
+ _LT_COMPILER_PIC($1)
+ _LT_COMPILER_C_O($1)
+ _LT_COMPILER_FILE_LOCKS($1)
+ _LT_LINKER_SHLIBS($1)
+ _LT_LINKER_HARDCODE_LIBPATH($1)
+
+ _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GO_CONFIG
+
+
# _LT_LANG_RC_CONFIG([TAG])
# -------------------------
# Ensure that the configuration variables for the Windows resource compiler
@@ -7509,6 +7652,13 @@
dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
+# LT_PROG_GO
+# ----------
+AC_DEFUN([LT_PROG_GO],
+[AC_CHECK_TOOL(GOC, gccgo,)
+])
+
+
# LT_PROG_RC
# ----------
AC_DEFUN([LT_PROG_RC],
diff --git a/auxdir/ltmain.sh b/auxdir/ltmain.sh
old mode 100755
new mode 100644
index b4a3231..c2852d8
--- a/auxdir/ltmain.sh
+++ b/auxdir/ltmain.sh
@@ -1,9 +1,9 @@
-# libtool (GNU libtool) 2.4
+# libtool (GNU libtool) 2.4.2
# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
-# 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
# This is free software; see the source for copying conditions. There is NO
# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -41,6 +41,7 @@
# --quiet, --silent don't print informational messages
# --no-quiet, --no-silent
# print informational messages (default)
+# --no-warn don't display warning messages
# --tag=TAG use configuration variables from tag TAG
# -v, --verbose print more informational messages than default
# --no-verbose don't print the extra informational messages
@@ -69,7 +70,7 @@
# compiler: $LTCC
# compiler flags: $LTCFLAGS
# linker: $LD (gnu? $with_gnu_ld)
-# $progname: (GNU libtool) 2.4 Debian-2.4-2ubuntu1
+# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1
# automake: $automake_version
# autoconf: $autoconf_version
#
@@ -79,9 +80,9 @@
PROGRAM=libtool
PACKAGE=libtool
-VERSION="2.4 Debian-2.4-2ubuntu1"
+VERSION="2.4.2 Debian-2.4.2-1ubuntu1"
TIMESTAMP=""
-package_revision=1.3293
+package_revision=1.3337
# Be Bourne compatible
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
@@ -136,15 +137,10 @@
: ${CP="cp -f"}
test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'}
-: ${EGREP="/bin/grep -E"}
-: ${FGREP="/bin/grep -F"}
-: ${GREP="/bin/grep"}
-: ${LN_S="ln -s"}
: ${MAKE="make"}
: ${MKDIR="mkdir"}
: ${MV="mv -f"}
: ${RM="rm -f"}
-: ${SED="/bin/sed"}
: ${SHELL="${CONFIG_SHELL-/bin/sh}"}
: ${Xsed="$SED -e 1s/^X//"}
@@ -387,7 +383,7 @@
;;
*)
save_IFS="$IFS"
- IFS=:
+ IFS=${PATH_SEPARATOR-:}
for progdir in $PATH; do
IFS="$save_IFS"
test -x "$progdir/$progname" && break
@@ -771,8 +767,8 @@
s*\$LTCFLAGS*'"$LTCFLAGS"'*
s*\$LD*'"$LD"'*
s/\$with_gnu_ld/'"$with_gnu_ld"'/
- s/\$automake_version/'"`(automake --version) 2>/dev/null |$SED 1q`"'/
- s/\$autoconf_version/'"`(autoconf --version) 2>/dev/null |$SED 1q`"'/
+ s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/
+ s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/
p
d
}
@@ -1052,6 +1048,7 @@
opt_help=false
opt_help_all=false
opt_silent=:
+opt_warning=:
opt_verbose=:
opt_silent=false
opt_verbose=false
@@ -1120,6 +1117,10 @@
opt_silent=false
func_append preserve_args " $opt"
;;
+ --no-warning|--no-warn)
+ opt_warning=false
+func_append preserve_args " $opt"
+ ;;
--no-verbose)
opt_verbose=false
func_append preserve_args " $opt"
@@ -2059,7 +2060,7 @@
*.[cCFSifmso] | \
*.ada | *.adb | *.ads | *.asm | \
*.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \
- *.[fF][09]? | *.for | *.java | *.obj | *.sx | *.cu | *.cup)
+ *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup)
func_xform "$libobj"
libobj=$func_xform_result
;;
@@ -3201,11 +3202,13 @@
# Set up the ranlib parameters.
oldlib="$destdir/$name"
+ func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+ tool_oldlib=$func_to_tool_file_result
func_show_eval "$install_prog \$file \$oldlib" 'exit $?'
if test -n "$stripme" && test -n "$old_striplib"; then
- func_show_eval "$old_striplib $oldlib" 'exit $?'
+ func_show_eval "$old_striplib $tool_oldlib" 'exit $?'
fi
# Do each command in the postinstall commands.
@@ -3470,7 +3473,7 @@
# linked before any other PIC object. But we must not use
# pic_flag when linking with -static. The problem exists in
# FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
- *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+ *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;;
*-*-hpux*)
pic_flag_for_symtable=" $pic_flag" ;;
@@ -3982,14 +3985,17 @@
# launches target application with the remaining arguments.
func_exec_program ()
{
- for lt_wr_arg
- do
- case \$lt_wr_arg in
- --lt-*) ;;
- *) set x \"\$@\" \"\$lt_wr_arg\"; shift;;
- esac
- shift
- done
+ case \" \$* \" in
+ *\\ --lt-*)
+ for lt_wr_arg
+ do
+ case \$lt_wr_arg in
+ --lt-*) ;;
+ *) set x \"\$@\" \"\$lt_wr_arg\"; shift;;
+ esac
+ shift
+ done ;;
+ esac
func_exec_program_core \${1+\"\$@\"}
}
@@ -5057,9 +5063,15 @@
{
EOF
func_emit_wrapper yes |
- $SED -e 's/\([\\"]\)/\\\1/g' \
- -e 's/^/ fputs ("/' -e 's/$/\\n", f);/'
-
+ $SED -n -e '
+s/^\(.\{79\}\)\(..*\)/\1\
+\2/
+h
+s/\([\\"]\)/\\\1/g
+s/$/\\n/
+s/\([^\n]*\).*/ fputs ("\1", f);/p
+g
+D'
cat <<"EOF"
}
EOF
@@ -5643,7 +5655,8 @@
continue
;;
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads)
+ -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
func_append compiler_flags " $arg"
func_append compile_command " $arg"
func_append finalize_command " $arg"
@@ -6150,7 +6163,8 @@
lib=
found=no
case $deplib in
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads)
+ -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
if test "$linkmode,$pass" = "prog,link"; then
compile_deplibs="$deplib $compile_deplibs"
finalize_deplibs="$deplib $finalize_deplibs"
@@ -6834,7 +6848,7 @@
test "$hardcode_direct_absolute" = no; then
add="$dir/$linklib"
elif test "$hardcode_minus_L" = yes; then
- add_dir="-L$dir"
+ add_dir="-L$absdir"
# Try looking first in the location we're being installed to.
if test -n "$inst_prefix_dir"; then
case $libdir in
@@ -7319,6 +7333,7 @@
# which has an extra 1 added just for fun
#
case $version_type in
+ # correct linux to gnu/linux during the next big refactor
darwin|linux|osf|windows|none)
func_arith $number_major + $number_minor
current=$func_arith_result
@@ -7438,7 +7453,7 @@
versuffix="$major.$revision"
;;
- linux)
+ linux) # correct to gnu/linux during the next big refactor
func_arith $current - $age
major=.$func_arith_result
versuffix="$major.$age.$revision"
@@ -8026,6 +8041,11 @@
# Test again, we may have decided not to build it any more
if test "$build_libtool_libs" = yes; then
+ # Remove ${wl} instances when linking with ld.
+ # FIXME: should test the right _cmds variable.
+ case $archive_cmds in
+ *\$LD\ *) wl= ;;
+ esac
if test "$hardcode_into_libs" = yes; then
# Hardcode the library paths
hardcode_libdirs=
@@ -8056,7 +8076,7 @@
elif test -n "$runpath_var"; then
case "$perm_rpath " in
*" $libdir "*) ;;
- *) func_apped perm_rpath " $libdir" ;;
+ *) func_append perm_rpath " $libdir" ;;
esac
fi
done
@@ -8064,11 +8084,7 @@
if test -n "$hardcode_libdir_separator" &&
test -n "$hardcode_libdirs"; then
libdir="$hardcode_libdirs"
- if test -n "$hardcode_libdir_flag_spec_ld"; then
- eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\"
- else
- eval dep_rpath=\"$hardcode_libdir_flag_spec\"
- fi
+ eval "dep_rpath=\"$hardcode_libdir_flag_spec\""
fi
if test -n "$runpath_var" && test -n "$perm_rpath"; then
# We should set the runpath_var.
@@ -9158,6 +9174,8 @@
esac
done
fi
+ func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+ tool_oldlib=$func_to_tool_file_result
eval cmds=\"$old_archive_cmds\"
func_len " $cmds"
@@ -9267,7 +9285,8 @@
*.la)
func_basename "$deplib"
name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+ func_resolve_sysroot "$deplib"
+ eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result`
test -z "$libdir" && \
func_fatal_error "\`$deplib' is not a valid libtool archive"
func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name"
diff --git a/auxdir/ltoptions.m4 b/auxdir/ltoptions.m4
index 17cfd51..5d9acd8 100644
--- a/auxdir/ltoptions.m4
+++ b/auxdir/ltoptions.m4
@@ -326,9 +326,24 @@
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
- [AS_HELP_STRING([--with-pic],
+ [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
- [pic_mode="$withval"],
+ [lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
diff --git a/auxdir/ltversion.m4 b/auxdir/ltversion.m4
index 9c7b5d4..07a8602 100644
--- a/auxdir/ltversion.m4
+++ b/auxdir/ltversion.m4
@@ -9,15 +9,15 @@
# @configure_input@
-# serial 3293 ltversion.m4
+# serial 3337 ltversion.m4
# This file is part of GNU Libtool
-m4_define([LT_PACKAGE_VERSION], [2.4])
-m4_define([LT_PACKAGE_REVISION], [1.3293])
+m4_define([LT_PACKAGE_VERSION], [2.4.2])
+m4_define([LT_PACKAGE_REVISION], [1.3337])
AC_DEFUN([LTVERSION_VERSION],
-[macro_version='2.4'
-macro_revision='1.3293'
+[macro_version='2.4.2'
+macro_revision='1.3337'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])
diff --git a/auxdir/missing b/auxdir/missing
index 28055d2..86a8fc3 100755
--- a/auxdir/missing
+++ b/auxdir/missing
@@ -1,10 +1,10 @@
#! /bin/sh
# Common stub for a few missing GNU programs while installing.
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2012-01-06.13; # UTC
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
-# 2008, 2009 Free Software Foundation, Inc.
+# 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
@@ -84,7 +84,6 @@
help2man touch the output file
lex create \`lex.yy.c', if possible, from existing .c
makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
@@ -122,15 +121,6 @@
# Not GNU programs, they don't have --version.
;;
- tar*)
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- exit 1
- fi
- ;;
-
*)
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
# We have it, but it failed.
@@ -226,7 +216,7 @@
\`Bison' from any GNU archive site."
rm -f y.tab.c y.tab.h
if test $# -ne 1; then
- eval LASTARG="\${$#}"
+ eval LASTARG=\${$#}
case $LASTARG in
*.y)
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
@@ -256,7 +246,7 @@
\`Flex' from any GNU archive site."
rm -f lex.yy.c
if test $# -ne 1; then
- eval LASTARG="\${$#}"
+ eval LASTARG=\${$#}
case $LASTARG in
*.l)
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
@@ -318,41 +308,6 @@
touch $file
;;
- tar*)
- shift
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case $firstarg in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case $firstarg in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
*)
echo 1>&2 "\
WARNING: \`$1' is needed, and is $msg.
diff --git a/auxdir/slurm.m4 b/auxdir/slurm.m4
index 3191108..8d1b9b6 100644
--- a/auxdir/slurm.m4
+++ b/auxdir/slurm.m4
@@ -72,20 +72,23 @@
dnl
AC_DEFUN([X_AC_DIMENSIONS], [
AC_MSG_CHECKING([System dimensions])
- AC_ARG_WITH(
- [dimensions],
+ AC_ARG_WITH([dimensions],
AS_HELP_STRING(--with-dimensions=N, set system dimension count for generic computer system),
[ if test `expr match "$withval" '[[0-9]]*$'` -gt 0; then
dimensions="$withval"
x_ac_dimensions=yes
fi
- ]
+ ],
+ [x_ac_dimensions=no]
)
if test "$x_ac_dimensions" = yes; then
if test $dimensions -lt 1; then
AC_MSG_ERROR([Invalid dimensions value $dimensions])
fi
+ AC_MSG_RESULT([$dimensions]);
AC_DEFINE_UNQUOTED(SYSTEM_DIMENSIONS, [$dimensions], [Define system dimension count])
+ else
+ AC_MSG_RESULT([not set]);
fi
])
diff --git a/auxdir/x_ac_bluegene.m4 b/auxdir/x_ac_bluegene.m4
index 717ef14..812327a 100644
--- a/auxdir/x_ac_bluegene.m4
+++ b/auxdir/x_ac_bluegene.m4
@@ -90,7 +90,7 @@
if test ! -z "$have_bg_files" ; then
BG_INCLUDES="$bg_includes"
- CFLAGS="$CFLAGS -m64"
+ CFLAGS="$CFLAGS -m64 --std=gnu99"
CXXFLAGS="$CXXFLAGS $CFLAGS"
AC_DEFINE(HAVE_3D, 1, [Define to 1 if 3-dimensional architecture])
AC_DEFINE(SYSTEM_DIMENSIONS, 3, [3-dimensional architecture])
@@ -231,6 +231,7 @@
libname=bgsched
loglibname=log4cxx
+ runjoblibname=runjob_client
for bg_dir in $trydb2dir "" $bg_default_dirs; do
# Skip directories that don't exist
@@ -259,7 +260,18 @@
fi
fi
- # Search for headers in the directory
+ soloc=$bg_dir/hlcs/lib/lib$runjoblibname.so
+ # Search for required BG API libraries in the directory
+ if test -z "$have_bg_ar" -a -f "$soloc" ; then
+ have_bgq_ar=yes
+ if test "$ac_with_rpath" = "yes"; then
+ runjob_ldflags="$runjob_ldflags -Wl,-rpath -Wl,$bg_dir/hlcs/lib -L$bg_dir/hlcs/lib -l$runjoblibname"
+ else
+ runjob_ldflags="$runjob_ldflags -L$bg_dir/hlcs/lib -l$runjoblibname"
+ fi
+ fi
+
+ # Search for headers in the directory
if test -z "$have_bg_hdr" -a -f "$bg_dir/hlcs/include/bgsched/bgsched.h" ; then
have_bgq_hdr=yes
bg_includes="-I$bg_dir/hlcs/include"
@@ -283,12 +295,31 @@
log4cxx::LoggerPtr logger_ptr(log4cxx::Logger::getLogger( "ibm" ));]])],
[have_bgq_files=yes],
[AC_MSG_ERROR(There is a problem linking to the BG/Q api.)])
+ # In later versions of the driver IBM added a better function
+ # to see if blocks were IO connected or not. Here is a check
+ # to not break backwards compatibility
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(
+ [[#include <bgsched/bgsched.h>
+ #include <bgsched/Block.h>]],
+ [[ bgsched::Block::checkIO("", NULL, NULL);]])],
+ [have_bgq_new_io_check=yes],
+ [AC_MSG_RESULT(Using old iocheck.)])
+ # In later versions of the driver IBM added an "action" to a
+ # block. Here is a check to not break backwards compatibility
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(
+ [[#include <bgsched/bgsched.h>
+ #include <bgsched/Block.h>]],
+ [[ bgsched::Block::Ptr block_ptr;
+ block_ptr->getAction();]])],
+ [have_bgq_get_action=yes],
+ [AC_MSG_RESULT(Blocks do not have actions!)])
AC_LANG_POP(C++)
LDFLAGS="$saved_LDFLAGS"
fi
if test ! -z "$have_bgq_files" ; then
BG_LDFLAGS="$bg_ldflags"
+ RUNJOB_LDFLAGS="$runjob_ldflags"
BG_INCLUDES="$bg_includes"
CFLAGS="$CFLAGS -m64"
CXXFLAGS="$CXXFLAGS $CFLAGS"
@@ -299,6 +330,13 @@
AC_DEFINE(HAVE_FRONT_END, 1, [Define to 1 if running slurmd on front-end only])
AC_DEFINE(HAVE_BG_FILES, 1, [Define to 1 if have Blue Gene files])
#AC_DEFINE_UNQUOTED(BG_BRIDGE_SO, "$soloc", [Define the BG_BRIDGE_SO value])
+ if test ! -z "$have_bgq_new_io_check" ; then
+ AC_DEFINE(HAVE_BG_NEW_IO_CHECK, 1, [Define to 1 if using code with new iocheck])
+ fi
+
+ if test ! -z "$have_bgq_get_action" ; then
+ AC_DEFINE(HAVE_BG_GET_ACTION, 1, [Define to 1 if using code where blocks have actions])
+ fi
AC_MSG_NOTICE([Running on a legitimate BG/Q system])
# AC_MSG_CHECKING(for BG serial value)
@@ -314,4 +352,5 @@
AC_SUBST(BG_INCLUDES)
AC_SUBST(BG_LDFLAGS)
+ AC_SUBST(RUNJOB_LDFLAGS)
])
diff --git a/auxdir/x_ac_cflags.m4 b/auxdir/x_ac_cflags.m4
index 09ed0b2..a9a616d 100644
--- a/auxdir/x_ac_cflags.m4
+++ b/auxdir/x_ac_cflags.m4
@@ -8,17 +8,17 @@
# X_AC_CFLAGS
#
# DESCRIPTION:
-# Add extra cflags
+# Add extra cflags
##*****************************************************************************
AC_DEFUN([X_AC_CFLAGS],
[
# This is here to avoid a bug in the gcc compiler 3.4.6
- # Without this flag there is a bug when pointing to other functions
+ # Without this flag there is a bug when pointing to other functions
# and then using them. It is also advised to set the flag if there
# are goto statements you may get better performance.
- if test "$GCC" == "yes"; then
- CFLAGS="$CFLAGS -fno-gcse"
+ if test "$GCC" = yes; then
+ CFLAGS="$CFLAGS -fno-gcse"
fi
])
diff --git a/auxdir/x_ac_databases.m4 b/auxdir/x_ac_databases.m4
index f73124c..e08fb1c 100644
--- a/auxdir/x_ac_databases.m4
+++ b/auxdir/x_ac_databases.m4
@@ -64,7 +64,7 @@
[ac_have_mysql="no"])
CFLAGS="$save_CFLAGS"
LIBS="$save_LIBS"
- if test "$ac_have_mysql" == "yes"; then
+ if test "$ac_have_mysql" = yes; then
AC_MSG_RESULT([MySQL test program built properly.])
AC_SUBST(MYSQL_LIBS)
AC_SUBST(MYSQL_CFLAGS)
@@ -89,7 +89,7 @@
CFLAGS="$save_CFLAGS"
LIBS="$save_LIBS"
- if test "$ac_have_mysql" == "yes"; then
+ if test "$ac_have_mysql" = yes; then
AC_MSG_RESULT([MySQL (non-threaded) test program built properly.])
AC_SUBST(MYSQL_LIBS)
AC_SUBST(MYSQL_CFLAGS)
@@ -103,7 +103,7 @@
fi
fi
fi
- AM_CONDITIONAL(WITH_MYSQL, test x"$ac_have_mysql" == x"yes")
+ AM_CONDITIONAL(WITH_MYSQL, test x"$ac_have_mysql" = x"yes")
#Check for PostgreSQL
ac_have_postgres="no"
@@ -145,7 +145,7 @@
[ac_have_pgsql="no"])
LIBS="$save_LIBS"
CFLAGS="$save_CFLAGS"
- if test "$ac_have_pgsql" == "yes"; then
+ if test "$ac_have_pgsql" = "yes"; then
AC_MSG_RESULT([PostgreSQL test program built properly.])
AC_SUBST(PGSQL_LIBS)
AC_SUBST(PGSQL_CFLAGS)
@@ -156,6 +156,6 @@
AC_MSG_WARN([*** PostgreSQL test program execution failed.])
fi
fi
- AM_CONDITIONAL(WITH_PGSQL, test x"$ac_have_pgsql" == x"yes")
+ AM_CONDITIONAL(WITH_PGSQL, test x"$ac_have_pgsql" = x"yes")
])
diff --git a/auxdir/x_ac_man2html.m4 b/auxdir/x_ac_man2html.m4
index d856566..255482e 100644
--- a/auxdir/x_ac_man2html.m4
+++ b/auxdir/x_ac_man2html.m4
@@ -15,7 +15,7 @@
AC_MSG_CHECKING([whether man2html is available])
AC_CHECK_PROG(ac_have_man2html, man2html, [yes], [no], [$bindir:/usr/bin:/usr/local/bin])
- AM_CONDITIONAL(HAVE_MAN2HTML, test "x$ac_have_man2html" == "xyes")
+ AM_CONDITIONAL(HAVE_MAN2HTML, test "x$ac_have_man2html" = "xyes")
if test "x$ac_have_man2html" != "xyes" ; then
AC_MSG_NOTICE([Unable to build man page html files without man2html])
diff --git a/auxdir/x_ac_munge.m4 b/auxdir/x_ac_munge.m4
index 68d913b..7b1f09e 100644
--- a/auxdir/x_ac_munge.m4
+++ b/auxdir/x_ac_munge.m4
@@ -54,6 +54,7 @@
else
MUNGE_LIBS="-lmunge"
MUNGE_CPPFLAGS="-I$x_ac_cv_munge_dir/include"
+ MUNGE_DIR="$x_ac_cv_munge_dir"
if test "$ac_with_rpath" = "yes"; then
MUNGE_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_munge_dir/$bit -L$x_ac_cv_munge_dir/$bit"
else
@@ -64,6 +65,7 @@
AC_SUBST(MUNGE_LIBS)
AC_SUBST(MUNGE_CPPFLAGS)
AC_SUBST(MUNGE_LDFLAGS)
+ AC_SUBST(MUNGE_DIR)
AM_CONDITIONAL(WITH_MUNGE, test -n "$x_ac_cv_munge_dir")
])
diff --git a/auxdir/x_ac_ncurses.m4 b/auxdir/x_ac_ncurses.m4
index d9fe5e7..72c1380 100644
--- a/auxdir/x_ac_ncurses.m4
+++ b/auxdir/x_ac_ncurses.m4
@@ -28,22 +28,22 @@
ac_have_some_curses="yes"
elif test "$ac_have_curses" = "yes"; then
NCURSES="-lcurses"
- NCURSES_HEADER="curses.h"
+ NCURSES_HEADER="curses.h"
ac_have_some_curses="yes"
fi
- if test "$ac_have_some_curses" = "yes"; then
- save_LIBS="$LIBS"
- LIBS="$NCURSES $save_LIBS"
- AC_TRY_LINK([#include <${NCURSES_HEADER}>],
- [(void)initscr(); (void)endwin();],
- [], [ac_have_some_curses="no"])
- LIBS="$save_LIBS"
- if test "$ac_have_some_curses" == "yes"; then
- AC_MSG_RESULT([NCURSES test program built properly.])
- else
- AC_MSG_WARN([*** NCURSES test program execution failed.])
- fi
+ if test "$ac_have_some_curses" = "yes"; then
+ save_LIBS="$LIBS"
+ LIBS="$NCURSES $save_LIBS"
+ AC_TRY_LINK([#include <${NCURSES_HEADER}>],
+ [(void)initscr(); (void)endwin();],
+ [], [ac_have_some_curses="no"])
+ LIBS="$save_LIBS"
+ if test "$ac_have_some_curses" = "yes"; then
+ AC_MSG_RESULT([NCURSES test program built properly.])
+ else
+ AC_MSG_WARN([*** NCURSES test program execution failed.])
+ fi
else
AC_MSG_WARN([Can not build smap without curses or ncurses library])
ac_have_some_curses="no"
diff --git a/auxdir/x_ac_printf_null.m4 b/auxdir/x_ac_printf_null.m4
index dbe47a3..466bd3a 100644
--- a/auxdir/x_ac_printf_null.m4
+++ b/auxdir/x_ac_printf_null.m4
@@ -47,13 +47,13 @@
*) have_solaris=no ;;
esac
- if test "$printf_null_ok" == "no" -a "$have_solaris" == "yes" -a -d /usr/lib64/0@0.so.1; then
+ if test "$printf_null_ok" = "no" -a "$have_solaris" = "yes" -a -d /usr/lib64/0@0.so.1; then
AC_MSG_ERROR([printf("%s", NULL) results in abort, upgrade to OpenSolaris release 119 or set LD_PRELOAD=/usr/lib64/0@0.so.1])
- elif test "$printf_null_ok" == "no" -a "$have_solaris" == "yes" -a -d /usr/lib/0@0.so.1; then
+ elif test "$printf_null_ok" = "no" -a "$have_solaris" = "yes" -a -d /usr/lib/0@0.so.1; then
AC_MSG_ERROR([printf("%s", NULL) results in abort, upgrade to OpenSolaris release 119 or set LD_PRELOAD=/usr/lib/0@0.so.1])
- elif test "$printf_null_ok" == "no" -a "$have_solaris" == "yes"; then
+ elif test "$printf_null_ok" = "no" -a "$have_solaris" = "yes"; then
AC_MSG_ERROR([printf("%s", NULL) results in abort, upgrade to OpenSolaris release 119])
- elif test "$printf_null_ok" == "no"; then
+ elif test "$printf_null_ok" = "no"; then
AC_MSG_ERROR([printf("%s", NULL) results in abort])
else
AC_MSG_RESULT([yes])
diff --git a/config.h.in b/config.h.in
index 9c265b1..0979d54 100644
--- a/config.h.in
+++ b/config.h.in
@@ -60,9 +60,15 @@
/* Define to 1 if have Blue Gene files */
#undef HAVE_BG_FILES
+/* Define to 1 if using code where blocks have actions */
+#undef HAVE_BG_GET_ACTION
+
/* Define to 1 if emulating or running on Blue Gene/L or P system */
#undef HAVE_BG_L_P
+/* Define to 1 if using code with new iocheck */
+#undef HAVE_BG_NEW_IO_CHECK
+
/* Define to 1 if you have the `cfmakeraw' function. */
#undef HAVE_CFMAKERAW
@@ -157,6 +163,9 @@
/* Define to 1 if you have the <linux/sched.h> header file. */
#undef HAVE_LINUX_SCHED_H
+/* Define to 1 if you have the <llapi.h> header file. */
+#undef HAVE_LLAPI_H
+
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
#undef HAVE_MALLOC
@@ -179,6 +188,9 @@
/* Define to 1 if you have the <netdb.h> header file. */
#undef HAVE_NETDB_H
+/* Define to 1 if you have the <nrt.h> header file. */
+#undef HAVE_NRT_H
+
/* define if numa library installed */
#undef HAVE_NUMA
diff --git a/configure b/configure
index 47febac..70f1d09 100755
--- a/configure
+++ b/configure
@@ -612,6 +612,10 @@
ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE
LTLIBOBJS
+BUILD_SVIEW_FALSE
+BUILD_SVIEW_TRUE
+BUILD_SMAP_FALSE
+BUILD_SMAP_TRUE
BUILD_SRUN2APRUN_FALSE
BUILD_SRUN2APRUN_TRUE
WITH_BLCR_FALSE
@@ -627,6 +631,7 @@
AUTHD_LIBS
WITH_MUNGE_FALSE
WITH_MUNGE_TRUE
+MUNGE_DIR
MUNGE_LDFLAGS
MUNGE_CPPFLAGS
MUNGE_LIBS
@@ -756,6 +761,7 @@
BGQ_LOADED
BGQ_LOADED_FALSE
BGQ_LOADED_TRUE
+RUNJOB_LDFLAGS
BG_LDFLAGS
am__fastdepCXX_FALSE
am__fastdepCXX_TRUE
@@ -776,6 +782,7 @@
am__fastdepCC_FALSE
am__fastdepCC_TRUE
CCDEPMODE
+am__nodep
AMDEPBACKSLASH
AMDEP_FALSE
AMDEP_TRUE
@@ -1612,7 +1619,7 @@
--with-bg-serial=NAME set BG_SERIAL value
--with-proctrack=PATH Specify path to proctrack sources
- --with-pic try to use only PIC/non-PIC objects [default=use
+ --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-sysroot=DIR Search for dependent libraries within DIR
@@ -3357,11 +3364,11 @@
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
-# Always define AMTAR for backward compatibility.
+# Always define AMTAR for backward compatibility. Yes, it's still used
+# in the wild :-( We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
-AMTAR=${AMTAR-"${am_missing_run}tar"}
-
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
@@ -3469,6 +3476,7 @@
if test "x$enable_dependency_tracking" != xno; then
am_depcomp="$ac_aux_dir/depcomp"
AMDEPBACKSLASH='\'
+ am__nodep='_no'
fi
if test "x$enable_dependency_tracking" != xno; then
AMDEP_TRUE=
@@ -4282,6 +4290,7 @@
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
+ rm -rf conftest.dir
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
@@ -4341,7 +4350,7 @@
break
fi
;;
- msvisualcpp | msvcmsys)
+ msvc7 | msvc7msys | msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
@@ -4514,7 +4523,7 @@
if test ! -z "$have_bg_files" ; then
BG_INCLUDES="$bg_includes"
- CFLAGS="$CFLAGS -m64"
+ CFLAGS="$CFLAGS -m64 --std=gnu99"
CXXFLAGS="$CXXFLAGS $CFLAGS"
$as_echo "#define HAVE_3D 1" >>confdefs.h
@@ -5018,6 +5027,7 @@
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
+ rm -rf conftest.dir
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
@@ -5077,7 +5087,7 @@
break
fi
;;
- msvisualcpp | msvcmsys)
+ msvc7 | msvc7msys | msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
@@ -5176,6 +5186,7 @@
libname=bgsched
loglibname=log4cxx
+ runjoblibname=runjob_client
for bg_dir in $trydb2dir "" $bg_default_dirs; do
# Skip directories that don't exist
@@ -5204,7 +5215,18 @@
fi
fi
- # Search for headers in the directory
+ soloc=$bg_dir/hlcs/lib/lib$runjoblibname.so
+ # Search for required BG API libraries in the directory
+ if test -z "$have_bg_ar" -a -f "$soloc" ; then
+ have_bgq_ar=yes
+ if test "$ac_with_rpath" = "yes"; then
+ runjob_ldflags="$runjob_ldflags -Wl,-rpath -Wl,$bg_dir/hlcs/lib -L$bg_dir/hlcs/lib -l$runjoblibname"
+ else
+ runjob_ldflags="$runjob_ldflags -L$bg_dir/hlcs/lib -l$runjoblibname"
+ fi
+ fi
+
+ # Search for headers in the directory
if test -z "$have_bg_hdr" -a -f "$bg_dir/hlcs/include/bgsched/bgsched.h" ; then
have_bgq_hdr=yes
bg_includes="-I$bg_dir/hlcs/include"
@@ -5246,6 +5268,52 @@
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
+ # In later versions of the driver IBM added a better function
+ # to see if blocks were IO connected or not. Here is a check
+ # to not break backwards compatibility
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <bgsched/bgsched.h>
+ #include <bgsched/Block.h>
+int
+main ()
+{
+ bgsched::Block::checkIO("", NULL, NULL);
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ have_bgq_new_io_check=yes
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using old iocheck." >&5
+$as_echo "Using old iocheck." >&6; }
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ # In later versions of the driver IBM added an "action" to a
+ # block. Here is a check to not break backwards compatibility
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <bgsched/bgsched.h>
+ #include <bgsched/Block.h>
+int
+main ()
+{
+ bgsched::Block::Ptr block_ptr;
+ block_ptr->getAction();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ have_bgq_get_action=yes
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Blocks do not have actions!" >&5
+$as_echo "Blocks do not have actions!" >&6; }
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -5257,6 +5325,7 @@
if test ! -z "$have_bgq_files" ; then
BG_LDFLAGS="$bg_ldflags"
+ RUNJOB_LDFLAGS="$runjob_ldflags"
BG_INCLUDES="$bg_includes"
CFLAGS="$CFLAGS -m64"
CXXFLAGS="$CXXFLAGS $CFLAGS"
@@ -5279,6 +5348,17 @@
$as_echo "#define HAVE_BG_FILES 1" >>confdefs.h
#AC_DEFINE_UNQUOTED(BG_BRIDGE_SO, "$soloc", [Define the BG_BRIDGE_SO value])
+ if test ! -z "$have_bgq_new_io_check" ; then
+
+$as_echo "#define HAVE_BG_NEW_IO_CHECK 1" >>confdefs.h
+
+ fi
+
+ if test ! -z "$have_bgq_get_action" ; then
+
+$as_echo "#define HAVE_BG_GET_ACTION 1" >>confdefs.h
+
+ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: Running on a legitimate BG/Q system" >&5
$as_echo "$as_me: Running on a legitimate BG/Q system" >&6;}
@@ -5297,6 +5377,7 @@
+
if test "x$ac_bgq_loaded" = "xyes"; then
BGQ_LOADED_TRUE=
BGQ_LOADED_FALSE='#'
@@ -6033,7 +6114,7 @@
LDFLAGS="$LDFLAGS -L/usr/sfw/lib"
;;
esac
- if test x"$ac_have_cygwin" == x"yes"; then
+ if test x"$ac_have_cygwin" = x"yes"; then
WITH_CYGWIN_TRUE=
WITH_CYGWIN_FALSE='#'
else
@@ -6595,6 +6676,7 @@
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
+ rm -rf conftest.dir
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
@@ -6654,7 +6736,7 @@
break
fi
;;
- msvisualcpp | msvcmsys)
+ msvc7 | msvc7msys | msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
@@ -6979,6 +7061,7 @@
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
+ rm -rf conftest.dir
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
@@ -7038,7 +7121,7 @@
break
fi
;;
- msvisualcpp | msvcmsys)
+ msvc7 | msvc7msys | msvisualcpp | msvcmsys)
# This compiler won't grok `-c -o', but also, the minuso test has
# not run yet. These depmodes are late enough in the game, and
# so weak that their functioning should not be impacted.
@@ -7132,8 +7215,8 @@
-macro_version='2.4'
-macro_revision='1.3293'
+macro_version='2.4.2'
+macro_revision='1.3337'
@@ -7777,6 +7860,11 @@
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -7816,7 +7904,7 @@
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -8245,7 +8333,7 @@
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
+# This must be glibc/ELF.
linux* | k*bsd*-gnu | kopensolaris*-gnu)
lt_cv_deplibs_check_method=pass_all
;;
@@ -8885,13 +8973,13 @@
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -9038,6 +9126,7 @@
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK '"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -9426,7 +9515,7 @@
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -9437,7 +9526,20 @@
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -10077,7 +10179,13 @@
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&5
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&5
@@ -10088,6 +10196,7 @@
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
if ${lt_cv_ld_exported_symbols_list+:} false; then :
@@ -10120,6 +10229,7 @@
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
$as_echo_n "checking for -force_load linker flag... " >&6; }
if ${lt_cv_ld_force_load+:} false; then :
@@ -10141,7 +10251,9 @@
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&5
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&5
@@ -10289,7 +10401,22 @@
# Check whether --with-pic was given.
if test "${with_pic+set}" = set; then :
- withval=$with_pic; pic_mode="$withval"
+ withval=$with_pic; lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac
else
pic_mode=default
fi
@@ -10367,6 +10494,10 @@
+
+
+
+
test -z "$LN_S" && LN_S="ln -s"
@@ -10826,7 +10957,9 @@
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
lt_prog_compiler_wl='-Xlinker '
- lt_prog_compiler_pic='-Xcompiler -fPIC'
+ if test -n "$lt_prog_compiler_pic"; then
+ lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+ fi
;;
esac
else
@@ -10917,18 +11050,33 @@
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ lt_prog_compiler_pic='-KPIC'
+ lt_prog_compiler_static='-Bstatic'
+ lt_prog_compiler_wl='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl='-Wl,'
;;
+ *Intel*\ [CF]*Compiler*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fPIC'
+ lt_prog_compiler_static='-static'
+ ;;
+ *Portland\ Group*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fpic'
+ lt_prog_compiler_static='-Bstatic'
+ ;;
esac
;;
esac
@@ -11290,7 +11438,6 @@
hardcode_direct=no
hardcode_direct_absolute=no
hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld=
hardcode_libdir_separator=
hardcode_minus_L=no
hardcode_shlibpath_var=unsupported
@@ -11543,8 +11690,7 @@
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
- hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld='-rpath $libdir'
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
@@ -11924,6 +12070,7 @@
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, )='true'
enable_shared_with_static_runtimes=yes
+ exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
old_postinstall_cmds='chmod 644 $oldlib'
@@ -11969,6 +12116,7 @@
hardcode_shlibpath_var=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec=''
fi
@@ -11997,10 +12145,6 @@
hardcode_shlibpath_var=no
;;
- freebsd1*)
- ld_shlibs=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -12013,7 +12157,7 @@
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
hardcode_direct=yes
hardcode_minus_L=yes
@@ -12052,7 +12196,6 @@
fi
if test "$with_gnu_ld" = no; then
hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_flag_spec_ld='+b $libdir'
hardcode_libdir_separator=:
hardcode_direct=yes
hardcode_direct_absolute=yes
@@ -12676,11 +12819,6 @@
-
-
-
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -12770,7 +12908,7 @@
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -12779,7 +12917,7 @@
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -12844,7 +12982,7 @@
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -12983,7 +13121,7 @@
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -12991,10 +13129,6 @@
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -13002,7 +13136,7 @@
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -13020,7 +13154,7 @@
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -13040,7 +13174,7 @@
;;
gnu*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
@@ -13051,7 +13185,7 @@
;;
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -13112,7 +13246,7 @@
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -13128,7 +13262,7 @@
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -13165,9 +13299,9 @@
dynamic_linker=no
;;
-# This must be Linux ELF.
+# This must be glibc/ELF.
linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -13261,7 +13395,7 @@
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -13330,7 +13464,7 @@
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -13355,7 +13489,7 @@
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -13379,7 +13513,7 @@
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -13410,7 +13544,7 @@
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -13420,7 +13554,7 @@
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -14345,7 +14479,6 @@
hardcode_direct_CXX=no
hardcode_direct_absolute_CXX=no
hardcode_libdir_flag_spec_CXX=
-hardcode_libdir_flag_spec_ld_CXX=
hardcode_libdir_separator_CXX=
hardcode_minus_L_CXX=no
hardcode_shlibpath_var_CXX=unsupported
@@ -14929,6 +15062,7 @@
hardcode_shlibpath_var_CXX=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec_CXX=''
fi
@@ -14973,7 +15107,7 @@
esac
;;
- freebsd[12]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
ld_shlibs_CXX=no
@@ -15649,6 +15783,7 @@
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -16438,7 +16573,9 @@
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
@@ -16594,8 +16731,6 @@
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -16621,7 +16756,7 @@
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -16630,7 +16765,7 @@
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -16695,7 +16830,7 @@
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -16832,7 +16967,7 @@
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -16840,10 +16975,6 @@
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -16851,7 +16982,7 @@
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -16869,7 +17000,7 @@
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -16889,7 +17020,7 @@
;;
gnu*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
@@ -16900,7 +17031,7 @@
;;
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -16961,7 +17092,7 @@
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -16977,7 +17108,7 @@
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -17014,9 +17145,9 @@
dynamic_linker=no
;;
-# This must be Linux ELF.
+# This must be glibc/ELF.
linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -17110,7 +17241,7 @@
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -17179,7 +17310,7 @@
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -17204,7 +17335,7 @@
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -17228,7 +17359,7 @@
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -17259,7 +17390,7 @@
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -17269,7 +17400,7 @@
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -17408,6 +17539,8 @@
+
+
ac_config_commands="$ac_config_commands libtool"
@@ -17789,6 +17922,7 @@
pty.h utmp.h \
sys/syslog.h linux/sched.h \
kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h sys/termios.h \
+ llapi.h nrt.h \
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
@@ -19257,7 +19391,7 @@
main ()
{
pthread_t th; pthread_join(th, 0);
- pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0); pthread_atfork(NULL, NULL, NULL);
pthread_create(0,0,0,0); pthread_cleanup_pop(0);
;
return 0;
@@ -19477,6 +19611,8 @@
x_ac_dimensions=yes
fi
+else
+ x_ac_dimensions=no
fi
@@ -19484,11 +19620,16 @@
if test $dimensions -lt 1; then
as_fn_error $? "Invalid dimensions value $dimensions" "$LINENO" 5
fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dimensions" >&5
+$as_echo "$dimensions" >&6; };
cat >>confdefs.h <<_ACEOF
#define SYSTEM_DIMENSIONS $dimensions
_ACEOF
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: not set" >&5
+$as_echo "not set" >&6; };
fi
@@ -19497,7 +19638,7 @@
# Without this flag there is a bug when pointing to other functions
# and then using them. It is also advised to set the flag if there
# are goto statements you may get better performance.
- if test "$GCC" == "yes"; then
+ if test "$GCC" = yes; then
CFLAGS="$CFLAGS -fno-gcse"
fi
@@ -19783,9 +19924,9 @@
fi
if test "$ac_have_some_curses" = "yes"; then
- save_LIBS="$LIBS"
- LIBS="$NCURSES $save_LIBS"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ save_LIBS="$LIBS"
+ LIBS="$NCURSES $save_LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <${NCURSES_HEADER}>
int
@@ -19803,14 +19944,14 @@
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
- LIBS="$save_LIBS"
- if test "$ac_have_some_curses" == "yes"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: NCURSES test program built properly." >&5
+ LIBS="$save_LIBS"
+ if test "$ac_have_some_curses" = "yes"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: NCURSES test program built properly." >&5
$as_echo "NCURSES test program built properly." >&6; }
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: *** NCURSES test program execution failed." >&5
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: *** NCURSES test program execution failed." >&5
$as_echo "$as_me: WARNING: *** NCURSES test program execution failed." >&2;}
- fi
+ fi
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Can not build smap without curses or ncurses library" >&5
$as_echo "$as_me: WARNING: Can not build smap without curses or ncurses library" >&2;}
@@ -20277,7 +20418,7 @@
conftest$ac_exeext conftest.$ac_ext
CFLAGS="$save_CFLAGS"
LIBS="$save_LIBS"
- if test "$ac_have_mysql" == "yes"; then
+ if test "$ac_have_mysql" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: MySQL test program built properly." >&5
$as_echo "MySQL test program built properly." >&6; }
@@ -20320,7 +20461,7 @@
CFLAGS="$save_CFLAGS"
LIBS="$save_LIBS"
- if test "$ac_have_mysql" == "yes"; then
+ if test "$ac_have_mysql" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: MySQL (non-threaded) test program built properly." >&5
$as_echo "MySQL (non-threaded) test program built properly." >&6; }
@@ -20340,7 +20481,7 @@
fi
fi
fi
- if test x"$ac_have_mysql" == x"yes"; then
+ if test x"$ac_have_mysql" = x"yes"; then
WITH_MYSQL_TRUE=
WITH_MYSQL_FALSE='#'
else
@@ -20486,7 +20627,7 @@
conftest$ac_exeext conftest.$ac_ext
LIBS="$save_LIBS"
CFLAGS="$save_CFLAGS"
- if test "$ac_have_pgsql" == "yes"; then
+ if test "$ac_have_pgsql" = "yes"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: PostgreSQL test program built properly." >&5
$as_echo "PostgreSQL test program built properly." >&6; }
@@ -20501,7 +20642,7 @@
$as_echo "$as_me: WARNING: *** PostgreSQL test program execution failed." >&2;}
fi
fi
- if test x"$ac_have_pgsql" == x"yes"; then
+ if test x"$ac_have_pgsql" = x"yes"; then
WITH_PGSQL_TRUE=
WITH_PGSQL_FALSE='#'
else
@@ -21048,7 +21189,7 @@
-if test "x$prefix" == "xNONE" ; then
+if test "x$prefix" = "xNONE" ; then
cat >>confdefs.h <<_ACEOF
#define SLURM_PREFIX "/usr/local"
@@ -21555,7 +21696,7 @@
- if test "x$ac_have_man2html" == "xyes"; then
+ if test "x$ac_have_man2html" = "xyes"; then
HAVE_MAN2HTML_TRUE=
HAVE_MAN2HTML_FALSE='#'
else
@@ -21607,13 +21748,13 @@
*) have_solaris=no ;;
esac
- if test "$printf_null_ok" == "no" -a "$have_solaris" == "yes" -a -d /usr/lib64/0@0.so.1; then
+ if test "$printf_null_ok" = "no" -a "$have_solaris" = "yes" -a -d /usr/lib64/0@0.so.1; then
as_fn_error $? "printf(\"%s\", NULL) results in abort, upgrade to OpenSolaris release 119 or set LD_PRELOAD=/usr/lib64/0@0.so.1" "$LINENO" 5
- elif test "$printf_null_ok" == "no" -a "$have_solaris" == "yes" -a -d /usr/lib/0@0.so.1; then
+ elif test "$printf_null_ok" = "no" -a "$have_solaris" = "yes" -a -d /usr/lib/0@0.so.1; then
as_fn_error $? "printf(\"%s\", NULL) results in abort, upgrade to OpenSolaris release 119 or set LD_PRELOAD=/usr/lib/0@0.so.1" "$LINENO" 5
- elif test "$printf_null_ok" == "no" -a "$have_solaris" == "yes"; then
+ elif test "$printf_null_ok" = "no" -a "$have_solaris" = "yes"; then
as_fn_error $? "printf(\"%s\", NULL) results in abort, upgrade to OpenSolaris release 119" "$LINENO" 5
- elif test "$printf_null_ok" == "no"; then
+ elif test "$printf_null_ok" = "no"; then
as_fn_error $? "printf(\"%s\", NULL) results in abort" "$LINENO" 5
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
@@ -21933,6 +22074,7 @@
else
MUNGE_LIBS="-lmunge"
MUNGE_CPPFLAGS="-I$x_ac_cv_munge_dir/include"
+ MUNGE_DIR="$x_ac_cv_munge_dir"
if test "$ac_with_rpath" = "yes"; then
MUNGE_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_munge_dir/$bit -L$x_ac_cv_munge_dir/$bit"
else
@@ -21944,6 +22086,7 @@
+
if test -n "$x_ac_cv_munge_dir"; then
WITH_MUNGE_TRUE=
WITH_MUNGE_FALSE='#'
@@ -22212,8 +22355,34 @@
+ac_build_smap="no"
+if test "x$ac_have_some_curses" = "xyes" ; then
+ ac_build_smap="yes"
+fi
+ if test "x$ac_build_smap" = "xyes"; then
+ BUILD_SMAP_TRUE=
+ BUILD_SMAP_FALSE='#'
+else
+ BUILD_SMAP_TRUE='#'
+ BUILD_SMAP_FALSE=
+fi
-ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/arrayrun/Makefile contribs/cray/Makefile contribs/lua/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sprio/Makefile src/srun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile"
+
+ac_build_sview="no"
+if test "x$ac_have_gtk" = "xyes" ; then
+ ac_build_sview="yes"
+fi
+ if test "x$ac_build_sview" = "xyes"; then
+ BUILD_SVIEW_TRUE=
+ BUILD_SVIEW_FALSE='#'
+else
+ BUILD_SVIEW_TRUE='#'
+ BUILD_SVIEW_FALSE=
+fi
+
+
+
+ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/arrayrun/Makefile contribs/cray/Makefile contribs/cray/opt_modulefiles_slurm contribs/lua/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile doc/man/man3/Makefile doc/man/man5/Makefile doc/man/man8/Makefile doc/html/Makefile doc/html/configurator.html doc/html/configurator.easy.html etc/init.d.slurm etc/init.d.slurmdbd src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sdiag/Makefile src/sprio/Makefile src/srun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/cgroup/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/mpi/pmi2/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile"
cat >confcache <<\_ACEOF
@@ -22494,6 +22663,14 @@
as_fn_error $? "conditional \"BUILD_SRUN2APRUN\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${BUILD_SMAP_TRUE}" && test -z "${BUILD_SMAP_FALSE}"; then
+ as_fn_error $? "conditional \"BUILD_SMAP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${BUILD_SVIEW_TRUE}" && test -z "${BUILD_SVIEW_FALSE}"; then
+ as_fn_error $? "conditional \"BUILD_SVIEW\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
@@ -23106,6 +23283,7 @@
enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
@@ -23188,7 +23366,6 @@
allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld='`$ECHO "$hardcode_libdir_flag_spec_ld" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
@@ -23260,7 +23437,6 @@
allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld_CXX='`$ECHO "$hardcode_libdir_flag_spec_ld_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
@@ -23299,6 +23475,7 @@
# Quote evaled strings.
for var in SHELL \
ECHO \
+PATH_SEPARATOR \
SED \
GREP \
EGREP \
@@ -23349,7 +23526,6 @@
allow_undefined_flag \
no_undefined_flag \
hardcode_libdir_flag_spec \
-hardcode_libdir_flag_spec_ld \
hardcode_libdir_separator \
exclude_expsyms \
include_expsyms \
@@ -23383,7 +23559,6 @@
allow_undefined_flag_CXX \
no_undefined_flag_CXX \
hardcode_libdir_flag_spec_CXX \
-hardcode_libdir_flag_spec_ld_CXX \
hardcode_libdir_separator_CXX \
exclude_expsyms_CXX \
include_expsyms_CXX \
@@ -23485,6 +23660,7 @@
"contribs/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/Makefile" ;;
"contribs/arrayrun/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/arrayrun/Makefile" ;;
"contribs/cray/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/cray/Makefile" ;;
+ "contribs/cray/opt_modulefiles_slurm") CONFIG_FILES="$CONFIG_FILES contribs/cray/opt_modulefiles_slurm" ;;
"contribs/lua/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/lua/Makefile" ;;
"contribs/pam/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/pam/Makefile" ;;
"contribs/perlapi/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/perlapi/Makefile" ;;
@@ -23497,6 +23673,17 @@
"contribs/phpext/slurm_php/config.m4") CONFIG_FILES="$CONFIG_FILES contribs/phpext/slurm_php/config.m4" ;;
"contribs/sjobexit/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/sjobexit/Makefile" ;;
"contribs/slurmdb-direct/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/slurmdb-direct/Makefile" ;;
+ "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
+ "doc/man/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/Makefile" ;;
+ "doc/man/man1/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/man1/Makefile" ;;
+ "doc/man/man3/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/man3/Makefile" ;;
+ "doc/man/man5/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/man5/Makefile" ;;
+ "doc/man/man8/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/man8/Makefile" ;;
+ "doc/html/Makefile") CONFIG_FILES="$CONFIG_FILES doc/html/Makefile" ;;
+ "doc/html/configurator.html") CONFIG_FILES="$CONFIG_FILES doc/html/configurator.html" ;;
+ "doc/html/configurator.easy.html") CONFIG_FILES="$CONFIG_FILES doc/html/configurator.easy.html" ;;
+ "etc/init.d.slurm") CONFIG_FILES="$CONFIG_FILES etc/init.d.slurm" ;;
+ "etc/init.d.slurmdbd") CONFIG_FILES="$CONFIG_FILES etc/init.d.slurmdbd" ;;
"src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
"src/api/Makefile") CONFIG_FILES="$CONFIG_FILES src/api/Makefile" ;;
"src/common/Makefile") CONFIG_FILES="$CONFIG_FILES src/common/Makefile" ;;
@@ -23510,6 +23697,7 @@
"src/salloc/Makefile") CONFIG_FILES="$CONFIG_FILES src/salloc/Makefile" ;;
"src/sbatch/Makefile") CONFIG_FILES="$CONFIG_FILES src/sbatch/Makefile" ;;
"src/sattach/Makefile") CONFIG_FILES="$CONFIG_FILES src/sattach/Makefile" ;;
+ "src/sdiag/Makefile") CONFIG_FILES="$CONFIG_FILES src/sdiag/Makefile" ;;
"src/sprio/Makefile") CONFIG_FILES="$CONFIG_FILES src/sprio/Makefile" ;;
"src/srun/Makefile") CONFIG_FILES="$CONFIG_FILES src/srun/Makefile" ;;
"src/srun_cr/Makefile") CONFIG_FILES="$CONFIG_FILES src/srun_cr/Makefile" ;;
@@ -23555,6 +23743,7 @@
"src/plugins/jobacct_gather/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobacct_gather/Makefile" ;;
"src/plugins/jobacct_gather/linux/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobacct_gather/linux/Makefile" ;;
"src/plugins/jobacct_gather/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobacct_gather/aix/Makefile" ;;
+ "src/plugins/jobacct_gather/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobacct_gather/cgroup/Makefile" ;;
"src/plugins/jobacct_gather/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobacct_gather/none/Makefile" ;;
"src/plugins/jobcomp/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/Makefile" ;;
"src/plugins/jobcomp/filetxt/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/filetxt/Makefile" ;;
@@ -23614,6 +23803,7 @@
"src/plugins/mpi/lam/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/mpi/lam/Makefile" ;;
"src/plugins/mpi/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/mpi/none/Makefile" ;;
"src/plugins/mpi/openmpi/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/mpi/openmpi/Makefile" ;;
+ "src/plugins/mpi/pmi2/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/mpi/pmi2/Makefile" ;;
"src/plugins/task/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/Makefile" ;;
"src/plugins/task/affinity/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/affinity/Makefile" ;;
"src/plugins/task/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/cgroup/Makefile" ;;
@@ -23623,10 +23813,6 @@
"src/plugins/topology/node_rank/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/topology/node_rank/Makefile" ;;
"src/plugins/topology/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/topology/none/Makefile" ;;
"src/plugins/topology/tree/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/topology/tree/Makefile" ;;
- "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
- "doc/man/Makefile") CONFIG_FILES="$CONFIG_FILES doc/man/Makefile" ;;
- "doc/html/Makefile") CONFIG_FILES="$CONFIG_FILES doc/html/Makefile" ;;
- "doc/html/configurator.html") CONFIG_FILES="$CONFIG_FILES doc/html/configurator.html" ;;
"testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;;
"testsuite/expect/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/expect/Makefile" ;;
"testsuite/slurm_unit/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/slurm_unit/Makefile" ;;
@@ -24344,8 +24530,8 @@
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -24399,6 +24585,9 @@
# An echo program that protects backslashes.
ECHO=$lt_ECHO
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
# The host system.
host_alias=$host_alias
host=$host
@@ -24700,10 +24889,6 @@
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator
@@ -25046,10 +25231,6 @@
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_CXX
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
diff --git a/configure.ac b/configure.ac
index 952e3f9..95c992f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,7 +86,7 @@
LDFLAGS="$LDFLAGS -L/usr/sfw/lib"
;;
esac
-AM_CONDITIONAL(WITH_CYGWIN, test x"$ac_have_cygwin" == x"yes")
+AM_CONDITIONAL(WITH_CYGWIN, test x"$ac_have_cygwin" = x"yes")
dnl Checks for programs.
dnl
@@ -118,6 +118,7 @@
pty.h utmp.h \
sys/syslog.h linux/sched.h \
kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h sys/termios.h \
+ llapi.h nrt.h \
)
AC_HEADER_SYS_WAIT
AC_HEADER_TIME
@@ -274,7 +275,7 @@
dnl add SLURM_PREFIX to config.h
dnl
-if test "x$prefix" == "xNONE" ; then
+if test "x$prefix" = "xNONE" ; then
AC_DEFINE_UNQUOTED(SLURM_PREFIX, "/usr/local", [Define Slurm installation prefix])
else
AC_DEFINE_UNQUOTED(SLURM_PREFIX, "$prefix", [Define Slurm installation prefix])
@@ -379,6 +380,21 @@
dnl
X_AC_SRUN2APRUN
+dnl
+dnl Set some configuration based upon multiple configuration parameters
+dnl
+ac_build_smap="no"
+if test "x$ac_have_some_curses" = "xyes" ; then
+ ac_build_smap="yes"
+fi
+AM_CONDITIONAL(BUILD_SMAP, test "x$ac_build_smap" = "xyes")
+
+ac_build_sview="no"
+if test "x$ac_have_gtk" = "xyes" ; then
+ ac_build_sview="yes"
+fi
+AM_CONDITIONAL(BUILD_SVIEW, test "x$ac_build_sview" = "xyes")
+
dnl All slurm Makefiles:
AC_CONFIG_FILES([Makefile
@@ -387,6 +403,7 @@
contribs/Makefile
contribs/arrayrun/Makefile
contribs/cray/Makefile
+ contribs/cray/opt_modulefiles_slurm
contribs/lua/Makefile
contribs/pam/Makefile
contribs/perlapi/Makefile
@@ -399,6 +416,17 @@
contribs/phpext/slurm_php/config.m4
contribs/sjobexit/Makefile
contribs/slurmdb-direct/Makefile
+ doc/Makefile
+ doc/man/Makefile
+ doc/man/man1/Makefile
+ doc/man/man3/Makefile
+ doc/man/man5/Makefile
+ doc/man/man8/Makefile
+ doc/html/Makefile
+ doc/html/configurator.html
+ doc/html/configurator.easy.html
+ etc/init.d.slurm
+ etc/init.d.slurmdbd
src/Makefile
src/api/Makefile
src/common/Makefile
@@ -412,6 +440,7 @@
src/salloc/Makefile
src/sbatch/Makefile
src/sattach/Makefile
+ src/sdiag/Makefile
src/sprio/Makefile
src/srun/Makefile
src/srun_cr/Makefile
@@ -457,6 +486,7 @@
src/plugins/jobacct_gather/Makefile
src/plugins/jobacct_gather/linux/Makefile
src/plugins/jobacct_gather/aix/Makefile
+ src/plugins/jobacct_gather/cgroup/Makefile
src/plugins/jobacct_gather/none/Makefile
src/plugins/jobcomp/Makefile
src/plugins/jobcomp/filetxt/Makefile
@@ -516,6 +546,7 @@
src/plugins/mpi/lam/Makefile
src/plugins/mpi/none/Makefile
src/plugins/mpi/openmpi/Makefile
+ src/plugins/mpi/pmi2/Makefile
src/plugins/task/Makefile
src/plugins/task/affinity/Makefile
src/plugins/task/cgroup/Makefile
@@ -525,10 +556,6 @@
src/plugins/topology/node_rank/Makefile
src/plugins/topology/none/Makefile
src/plugins/topology/tree/Makefile
- doc/Makefile
- doc/man/Makefile
- doc/html/Makefile
- doc/html/configurator.html
testsuite/Makefile
testsuite/expect/Makefile
testsuite/slurm_unit/Makefile
diff --git a/contribs/Makefile.in b/contribs/Makefile.in
index 419eea2..6c9f9d0 100644
--- a/contribs/Makefile.in
+++ b/contribs/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -584,10 +586,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/README b/contribs/README
index 57bd818..6921c7e 100644
--- a/contribs/README
+++ b/contribs/README
@@ -1,4 +1,4 @@
-This is the contribs dir for SLURM.
+This is the contribs dir for SLURM.
SOURCE DISTRIBUTION HIERARCHY
-----------------------------
@@ -11,46 +11,46 @@
README - Description of the arrayrun tool and its use
arrayrun - Command used to submit job arrays
arrayrun_worker - Back-end to the arrayrun command responsible for
- spawning the jobs in the array
+ spawning the jobs in the array
cray [Tools for use on Cray systems]
etc_init_d_munge - /etc/init.d/munge script for use with Munge
etc_sysconfig_slurm - /etc/sysconfig/slurm for Cray XT/XE systems
libalps_test_programs.tar.gz - set of tools to verify ALPS/BASIL support
- logic. Note that this currently requires:
- * hardcoding in libsdb/basil_mysql_routines.c:
- mysql_real_connect(handle, "localhost", NULL, NULL, "XT5istanbul"
- * suitable /etc/my.cnf, containing at least the lines
- [client]
- user=basic
- password=basic
- * setting the APBASIL in the libalps/Makefile, e.g.
- APBASIL := slurm/alps_simulator/apbasil.sh
- To use, extract the files then:
- > cd libasil/
- > make -C alps_tests all # runs basil parser tests
- > make -C sdb_tests all # checks if database routines work
- A tool named tuxadmin is also also included. When
- executed with the -s or --slurm.conf option, this
- contact the SDB to generate system-specific information
- needed in slurm.conf (e.g. "NodeName=nid..." and
- "PartitionName= Nodes=nid... MaxNodes=...".
+ logic. Note that this currently requires:
+ * hardcoding in libsdb/basil_mysql_routines.c:
+ mysql_real_connect(handle, "localhost", NULL, NULL, "XT5istanbul"
+ * suitable /etc/my.cnf, containing at least the lines
+ [client]
+ user=basic
+ password=basic
+ * setting the APBASIL in the libalps/Makefile, e.g.
+ APBASIL := slurm/alps_simulator/apbasil.sh
+ To use, extract the files then:
+ > cd libasil/
+ > make -C alps_tests all # runs basil parser tests
+ > make -C sdb_tests all # checks if database routines work
+ A tool named tuxadmin is also also included. When
+ executed with the -s or --slurm.conf option, this
+ contact the SDB to generate system-specific information
+ needed in slurm.conf (e.g. "NodeName=nid..." and
+ "PartitionName= Nodes=nid... MaxNodes=...".
munge_build_script.sh - script to build Munge from sources for Cray system
opt_modulefiles_slurm - enables use of Munge as soon as built
slurm-build-script.sh - script to build SLURM from sources for Cray system.
- set LIBROOT and SLURM_SRC environment variables
- before use, for example:
- LIBROOT=/ufs/slurm/build
- SLURM_SRC=${SLURM_SRC:-${LIBROOT}/slurm-2.3.0-0.pre4}
+ set LIBROOT and SLURM_SRC environment variables
+ before use, for example:
+ LIBROOT=/ufs/slurm/build
+ SLURM_SRC=${SLURM_SRC:-${LIBROOT}/slurm-2.3.0-0.pre4}
srun.pl - A perl wrapper for the aprun command. Use of this
- wrapper requires that SLURM's perlapi be installed.
- Execute configure with the --with-srun2aprun option
- to build and install this instead of SLURM's normal
- srun command.
+ wrapper requires that SLURM's perlapi be installed.
+ Execute configure with the --with-srun2aprun option
+ to build and install this instead of SLURM's normal
+ srun command.
env_cache_builder.c [C program]
- This program will build an environment variable cache file for specific
- users or all users on the system. This can be used to prevent the aborting
+ This program will build an environment variable cache file for specific
+ users or all users on the system. This can be used to prevent the aborting
of jobs submitted by Moab using the srun/sbatch --get-user-env option.
Build with "make -f /dev/null env_cache_builder" and execute as user root
on the node where the moab daemon runs.
@@ -58,11 +58,11 @@
lua [ LUA scripts ]
Example LUA scripts that can serve as SLURM plugins.
job_submit.lua - job_submit plugin that can set a job's default partition
- using a very simple algorithm
+ using a very simple algorithm
job_submit_license.lua - job_submit plugin that can set a job's use of
- system licenses
+ system licenses
proctrack.lua - proctrack (process tracking) plugin that implements a
- very simple job step container using CPUSETs
+ very simple job step container using CPUSETs
make.slurm.patch [ Patch to "make" command for parallel build ]
This patch will use SLURM to launch tasks across a job's current resource
@@ -74,10 +74,10 @@
will be unchanged. Designed for GNU make-3.81.
mpich1.slurm.patch [ Patch to mpich1/p4 library for SLURM job task launch ]
- For SLURM based job initiations (from srun command), get the parameters
- from environment variables as needed. This allows for a truly parallel
- job launch using the existing "execer" mode of operation with slight
- modification.
+ For SLURM based job initiations (from srun command), get the parameters
+ from environment variables as needed. This allows for a truly parallel
+ job launch using the existing "execer" mode of operation with slight
+ modification.
pam [ PAM (Pluggable Authentication Module) for SLURM ]
This PAM module will restrict who can login to a node to users who have
@@ -87,12 +87,16 @@
API to SLURM using perl. Making available all SLURM command that exist
in the SLURM proper API.
+ phpext [ PHP API to SLURM source ]
+ API to SLURM using php. Not a complete API, but offers quite a few
+ interfaces to existing SLURM proper APIs.
+
ptrace.patch [ Linux Kernel patch required to for TotalView use ]
0. This has been fixed on most recent Linux kernels. Older versions of
Linux may need this patch support TotalView.
- 1. gdb and other tools cannot attach to a stopped process. The wait that
+ 1. gdb and other tools cannot attach to a stopped process. The wait that
follows the PTRACE_ATTACH will block indefinitely.
- 2. It is not possible to use PTRACE_DETACH to leave a process stopped,
+ 2. It is not possible to use PTRACE_DETACH to leave a process stopped,
because ptrace ignores SIGSTOPs sent by the tracing process.
sjobexit/ [ Perl programs ]
@@ -104,8 +108,8 @@
skilling.c [ C program ]
This program can be used to order the hostnames in a 2+ dimensional
architecture for use in the slurm.conf file. It is used to generate
- the Hilbert number based upon a node's physical location in the
- computer. Nodes close together in their Hilbert number will also be
+ the Hilbert number based upon a node's physical location in the
+ computer. Nodes close together in their Hilbert number will also be
physically close in 2-D or 3-D space, so we can reduce the 2-D or 3-D
job placement problem to a 1-D problem that SLURM can easily handle
by defining the node names in the slurm.conf file in order of their
@@ -113,6 +117,9 @@
power of two size, then collapse the node list maintaining the numeric
order based upon the Hilbert number.
+ slurm_completion_help [shell script, vim file]
+ Scripts to help in option completion when using slurm commands.
+
slurmdb-direct [ Perl program ]
Program that permits writing directly to SlurmDBD (SLURM DataBase Daemon).
@@ -122,10 +129,10 @@
time_login.c [ C program ]
This program will report how long a pseudo-login will take for specific
- users or all users on the system. Users identified by this program
+ users or all users on the system. Users identified by this program
will not have their environment properly set for jobs submitted through
- Moab. Build with "make -f /dev/null time_login" and execute as user root.
+ Moab. Build with "make -f /dev/null time_login" and execute as user root.
torque/ [ Wrapper Scripts for Torque migration to SLURM ]
- Helpful scripts to make transition to SLURM easier from PBS or Torque.
+ Helpful scripts to make transition to SLURM easier from PBS or Torque.
These scripts are easily updatable if there is functionality missing.
diff --git a/contribs/arrayrun/Makefile.in b/contribs/arrayrun/Makefile.in
index 5ff4342..34d3722 100644
--- a/contribs/arrayrun/Makefile.in
+++ b/contribs/arrayrun/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -379,10 +381,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/cray/Makefile.in b/contribs/cray/Makefile.in
index f7117ad..e9f2a97 100644
--- a/contribs/cray/Makefile.in
+++ b/contribs/cray/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -40,7 +40,8 @@
host_triplet = @host@
target_triplet = @target@
subdir = contribs/cray
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/opt_modulefiles_slurm.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
$(top_srcdir)/auxdir/libtool.m4 \
@@ -84,7 +85,7 @@
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
-CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_FILES = opt_modulefiles_slurm
CONFIG_CLEAN_VPATH_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
@@ -107,6 +108,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(bindir)"
SCRIPTS = $(bin_SCRIPTS)
SOURCES =
@@ -189,6 +196,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -225,6 +233,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -357,6 +366,8 @@
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
+opt_modulefiles_slurm: $(top_builddir)/config.status $(srcdir)/opt_modulefiles_slurm.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
mostlyclean-libtool:
-rm -f *.lo
@@ -417,10 +428,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/cray/opt_modulefiles_slurm.in b/contribs/cray/opt_modulefiles_slurm.in
new file mode 100644
index 0000000..a4ba766
--- /dev/null
+++ b/contribs/cray/opt_modulefiles_slurm.in
@@ -0,0 +1,49 @@
+#%Module1.0#####################################################################
+# slurm/munge support module
+# Put into /opt/modulefiles/slurm or some other part of $MODULEPATH
+################################################################################
+
+# SUBROUTINES
+proc ModulesHelp { } {
+ puts stderr "\tThis is slurm $::version.\n"
+ puts stderr "\tPlease consult http://www.schedmd.com/slurmdocs/cray.html"
+}
+
+# CONFIGURATION
+conflict xt-pbs pbs torque
+set slurmdir "@prefix@"
+set mungedir "@MUNGE_DIR@"
+set perldir [exec perl -e "use Config; \$T=\$Config{installsitearch}; \$P=\$Config{installprefix}; \$P1=\"\$P/local\"; \$T =~ s/\$P1//; \$T =~ s/\$P//; print \$T;"]
+
+set version "UNKNOWN"
+if {![catch {exec $slurmdir/bin/sbatch --version} out]} {
+ set version [lindex $out 1]
+}
+set helptext "Support for the SLURM $version resource allocation system"
+
+# SCRIPT PROPER
+module-whatis $helptext
+
+prepend-path PATH "$slurmdir/bin"
+prepend-path PATH "$mungedir/bin"
+
+prepend-path MANPATH "$slurmdir/share/man"
+prepend-path MANPATH "$mungedir/share/man"
+
+prepend-path PERL5LIB "$slurmdir/$perldir"
+
+# other useful environment variables
+setenv SINFO_FORMAT {%9P %5a %8s %.10l %.6c %.6z %.7D %10T %N}
+setenv SQUEUE_FORMAT {%.6i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C}
+setenv SQUEUE_ALL {yes} ;# show hidden partitions, too
+setenv SQUEUE_SORT {-t,e,S}
+
+# logfile aliases
+set-alias sd_log {tail -f "/ufs/slurm/var/log/slurmd.log"}
+set-alias sc_log {tail -f "/ufs/slurm/var/log/slurmctld.log"}
+
+if {[exec id -u] == 0} {
+ prepend-path PATH "$slurmdir/sbin"
+ prepend-path PATH "$mungedir/sbin"
+ set-alias sdown {scontrol shutdown}
+}
diff --git a/contribs/lua/Makefile.in b/contribs/lua/Makefile.in
index f605105..9d13fd0 100644
--- a/contribs/lua/Makefile.in
+++ b/contribs/lua/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -379,10 +381,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/lua/job_submit.license.lua b/contribs/lua/job_submit.license.lua
index db2fcf5..98078f4 100644
--- a/contribs/lua/job_submit.license.lua
+++ b/contribs/lua/job_submit.license.lua
@@ -3,7 +3,7 @@
Example lua script demonstrating the SLURM job_submit/lua interface.
This is only an example, not meant for use in its current form.
- For use, this script should be copied into a file name job_"submit.lua"
+ For use, this script should be copied into a file name "job_submit.lua"
in the same directory as the SLURM configuration file, slurm.conf.
--]]
@@ -31,7 +31,7 @@
--
--########################################################################--
-function slurm_job_submit ( job_desc, part_list )
+function slurm_job_submit ( job_desc, part_list, submit_uid )
setmetatable (job_desc, job_req_meta)
local bad_license_count = 0
@@ -48,7 +48,7 @@
return 0
end
-function slurm_job_modify ( job_desc, job_rec, part_list )
+function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid )
setmetatable (job_desc, job_req_meta)
setmetatable (job_rec, job_rec_meta)
local bad_license_count = 0
diff --git a/contribs/lua/job_submit.lua b/contribs/lua/job_submit.lua
index 4853227..e1ecbef 100644
--- a/contribs/lua/job_submit.lua
+++ b/contribs/lua/job_submit.lua
@@ -7,7 +7,7 @@
set up logic in each function unchanged. Change only the logic after
the line containing "*** YOUR LOGIC GOES BELOW ***".
- For use, this script should be copied into a file name job_"submit.lua"
+ For use, this script should be copied into a file name "job_submit.lua"
in the same directory as the SLURM configuration file, slurm.conf.
--]]
@@ -27,7 +27,7 @@
--
--########################################################################--
-function slurm_job_submit ( job_desc, part_list )
+function slurm_job_submit ( job_desc, part_list, submit_uid )
setmetatable (job_desc, job_req_meta)
local part_rec = _build_part_table (part_list)
@@ -35,7 +35,7 @@
if job_desc.account == nil then
local account = "***TEST_ACCOUNT***"
log_info("slurm_job_submit: job from uid %d, setting default account value: %s",
- job_desc.user_id, account)
+ submit_uid, account)
job_desc.account = account
end
-- If no default partition, set the partition to the highest
@@ -66,7 +66,7 @@
return 0
end
-function slurm_job_modify ( job_desc, job_rec, part_list )
+function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid )
setmetatable (job_desc, job_req_meta)
setmetatable (job_rec, job_rec_meta)
local part_rec = _build_part_table (part_list)
@@ -74,8 +74,8 @@
-- *** YOUR LOGIC GOES BELOW ***
if job_desc.comment == nil then
local comment = "***TEST_COMMENT***"
- log_info("slurm_job_modify: for job %u, setting default comment value: %s",
- job_rec.job_id, comment)
+ log_info("slurm_job_modify: for job %u from uid %d, setting default comment value: %s",
+ job_rec.job_id, modify_uid, comment)
job_desc.comment = comment
end
diff --git a/contribs/pam/Makefile.in b/contribs/pam/Makefile.in
index d4a36c3..e45f16e 100644
--- a/contribs/pam/Makefile.in
+++ b/contribs/pam/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -106,6 +106,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
@HAVE_PAM_TRUE@pam_slurm_la_DEPENDENCIES = \
@@ -215,6 +221,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -251,6 +258,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -419,7 +427,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-pam_slurm.la: $(pam_slurm_la_OBJECTS) $(pam_slurm_la_DEPENDENCIES)
+pam_slurm.la: $(pam_slurm_la_OBJECTS) $(pam_slurm_la_DEPENDENCIES) $(EXTRA_pam_slurm_la_DEPENDENCIES)
$(pam_slurm_la_LINK) $(am_pam_slurm_la_rpath) $(pam_slurm_la_OBJECTS) $(pam_slurm_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -556,10 +564,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/pam/README b/contribs/pam/README
index 825b585..62b2668 100644
--- a/contribs/pam/README
+++ b/contribs/pam/README
@@ -34,6 +34,27 @@
rsh_kludge - prevent truncation of first char from rsh error msg
rlogin_kludge - prevent "staircase-effect" following rlogin error msg
+Notes:
+ This module will not work on systems where the hostname returned by the
+ gethostname() differs from SLURM node name. This includes front-end
+ configurations (IBM BlueGene or Cray systems) or systems configured
+ in SLURM using the NodeHostName parameter.
+ rsh_kludge - The rsh service under RH71 (rsh-0.17-2.5) truncates the first
+ character of this message. The rsh client sends 3 NUL-terminated ASCII
+ strings: client-user-name, server-user-name, and command string. The
+ server then validates the user. If the user is valid, it responds with a
+ 1-byte zero; otherwise, it responds with a 1-byte one followed by an ASCII
+ error message and a newline. RH's server is using the default PAM
+ conversation function which doesn't prepend the message with a
+ single-byte error code. As a result, the client receives a string,
+ interprets the first byte as a non-zero status, and treats the
+ remaining string as an error message. The rsh_kludge prepends a
+ newline which will be interpreted by the rsh client as an error status.
+ rlogin_kludge - The rlogin service under RH71 (rsh-0.17-2.5) does not perform
+ a carriage-return after the PAM error message is displayed which results
+ in the "staircase-effect" of the next message. The rlogin_kludge appends
+ a carriage-return to prevent this.
+
Examples / Suggested Usage:
Use of this module is recommended on any compute node where you want to
limit access to just those users who are currently scheduled to run jobs.
diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
index 62b7615..9381d75 100644
--- a/contribs/pam/pam_slurm.c
+++ b/contribs/pam/pam_slurm.c
@@ -49,6 +49,8 @@
#include <dlfcn.h>
#include "slurm/slurm.h"
+#include "src/common/xmalloc.h"
+#include "src/common/read_config.h"
/* Define the externally visible functions in this file.
*/
@@ -83,7 +85,7 @@
*
*/
static void * slurm_h = NULL;
-static int debug = 0;
+static int pam_debug = 0;
static void _log_msg(int level, const char *format, ...);
static void _parse_args(struct _options *opts, int argc, const char **argv);
@@ -94,7 +96,7 @@
#define DBG(msg,args...) \
do { \
- if (debug) \
+ if (pam_debug) \
_log_msg(LOG_INFO, msg, ##args); \
} while (0);
@@ -211,7 +213,7 @@
*/
for (i=0; i<argc; i++) {
if (!strcmp(argv[i], "debug"))
- opts->enable_debug = debug = 1;
+ opts->enable_debug = pam_debug = 1;
else if (!strcmp(argv[i], "no_sys_info"))
opts->disable_sys_info = 1;
else if (!strcmp(argv[i], "no_warn"))
@@ -228,7 +230,7 @@
/*
* Return 1 if 'hostname' is a member of 'str', a SLURM-style host list as
- * returned by SLURM datatbase queries, else 0. The 'str' argument is
+ * returned by SLURM database queries, else 0. The 'str' argument is
* truncated to the base prefix as a side-effect.
*/
static int
@@ -251,6 +253,40 @@
return 1;
}
+/* _gethostname_short - equivalent to gethostname, but return only the first
+ * component of the fully qualified name
+ * (e.g. "linux123.foo.bar" becomes "linux123")
+ *
+ * Copied from src/common/read_config.c because it is not exported
+ * through libslurm.
+ *
+ * OUT name
+ */
+static int
+_gethostname_short (char *name, size_t len)
+{
+ int error_code, name_len;
+ char *dot_ptr, path_name[1024];
+
+ error_code = gethostname(path_name, sizeof(path_name));
+ if (error_code)
+ return error_code;
+
+ dot_ptr = strchr (path_name, '.');
+ if (dot_ptr == NULL)
+ dot_ptr = path_name + strlen(path_name);
+ else
+ dot_ptr[0] = '\0';
+
+ name_len = (dot_ptr - path_name);
+ if (name_len > len)
+ return ENAMETOOLONG;
+
+ strcpy(name, path_name);
+ return 0;
+}
+
+
/*
* Query the SLURM database to find out if 'uid' has been allocated
* this node. If so, return 1 indicating that 'uid' is authorized to
@@ -260,17 +296,30 @@
_slurm_match_allocation(uid_t uid)
{
int authorized = 0, i;
- char hostname[MAXHOSTNAMELEN], *p;
+ char hostname[MAXHOSTNAMELEN];
+ char *nodename = NULL;
job_info_msg_t * msg;
- if (gethostname(hostname, sizeof(hostname)) < 0) {
+ if (_gethostname_short(hostname, sizeof(hostname)) < 0) {
_log_msg(LOG_ERR, "gethostname: %m");
return 0;
}
- if ((p = strchr(hostname, '.')))
- *p = '\0';
- DBG ("does uid %ld have \"%s\" allocated", uid, hostname);
+ if (!(nodename = slurm_conf_get_nodename(hostname))) {
+ if (!(nodename = slurm_conf_get_aliased_nodename())) {
+ /* if no match, try localhost (Should only be
+ * valid in a test environment) */
+ if (!(nodename =
+ slurm_conf_get_nodename("localhost"))) {
+ _log_msg(LOG_ERR,
+ "slurm_conf_get_aliased_nodename: "
+ "no hostname found");
+ return 0;
+ }
+ }
+ }
+
+ DBG ("does uid %ld have \"%s\" allocated?", uid, nodename);
if (slurm_load_jobs((time_t) 0, &msg, SHOW_ALL) < 0) {
_log_msg(LOG_ERR, "slurm_load_jobs: %s",
@@ -287,15 +336,15 @@
DBG ("jobid %ld: nodes=\"%s\"", j->job_id, j->nodes);
- if (_hostrange_member(hostname, j->nodes) ) {
+ if (_hostrange_member(nodename, j->nodes) ) {
DBG ("user %ld allocated node %s in job %ld",
- uid, hostname, j->job_id);
+ uid, nodename, j->job_id);
authorized = 1;
break;
}
}
}
-
+ xfree(nodename);
slurm_free_job_info_msg (msg);
return authorized;
@@ -368,7 +417,7 @@
/* First try to use the same libslurm version ("libslurm.so.24.0.0"),
* Second try to match the major version number ("libslurm.so.24"),
* Otherwise use "libslurm.so" */
- if (snprintf(libslurmname, sizeof(libslurmname),
+ if (snprintf(libslurmname, sizeof(libslurmname),
"libslurm.so.%d.%d.%d", SLURM_API_CURRENT,
SLURM_API_REVISION, SLURM_API_AGE) >=
sizeof(libslurmname) ) {
diff --git a/contribs/perlapi/Makefile.in b/contribs/perlapi/Makefile.in
index 2301164..7bb0afd 100644
--- a/contribs/perlapi/Makefile.in
+++ b/contribs/perlapi/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -573,10 +575,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/perlapi/libslurm/Makefile.in b/contribs/perlapi/libslurm/Makefile.in
index f348bd6..53c0cba 100644
--- a/contribs/perlapi/libslurm/Makefile.in
+++ b/contribs/perlapi/libslurm/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -414,10 +416,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
maintainer-clean-generic:
diff --git a/contribs/perlapi/libslurm/perl/block.c b/contribs/perlapi/libslurm/perl/block.c
index 5d4f9df..0feb688 100644
--- a/contribs/perlapi/libslurm/perl/block.c
+++ b/contribs/perlapi/libslurm/perl/block.c
@@ -55,7 +55,6 @@
}
hv_store_sv(hv, "ionode_inx", newRV_noinc((SV*)av));
}
- STORE_FIELD(hv, block_info, job_running, uint32_t);
if(block_info->linuximage)
STORE_FIELD(hv, block_info, linuximage, charp);
if(block_info->mloaderimage)
@@ -63,9 +62,8 @@
if(block_info->mp_str)
STORE_FIELD(hv, block_info, mp_str, charp);
STORE_FIELD(hv, block_info, cnode_cnt, uint32_t);
+ STORE_FIELD(hv, block_info, cnode_err_cnt, uint32_t);
STORE_FIELD(hv, block_info, node_use, uint16_t);
- if (block_info->owner_name)
- STORE_FIELD(hv, block_info, owner_name, charp);
if(block_info->ramdiskimage)
STORE_FIELD(hv, block_info, ramdiskimage, charp);
if(block_info->reason)
@@ -124,13 +122,11 @@
} else {
/* nothing to do */
}
- FETCH_FIELD(hv, block_info, job_running, uint32_t, TRUE);
FETCH_FIELD(hv, block_info, linuximage, charp, FALSE);
FETCH_FIELD(hv, block_info, mloaderimage, charp, FALSE);
FETCH_FIELD(hv, block_info, mp_str, charp, FALSE);
FETCH_FIELD(hv, block_info, cnode_cnt, uint32_t, TRUE);
FETCH_FIELD(hv, block_info, node_use, uint16_t, TRUE);
- FETCH_FIELD(hv, block_info, owner_name, charp, FALSE);
FETCH_FIELD(hv, block_info, ramdiskimage, charp, FALSE);
FETCH_FIELD(hv, block_info, reason, charp, FALSE);
FETCH_FIELD(hv, block_info, state, uint16_t, TRUE);
@@ -252,13 +248,11 @@
} else {
/* nothing to do */
}
- FETCH_FIELD(hv, update_msg, job_running, uint32_t, FALSE);
FETCH_FIELD(hv, update_msg, linuximage, charp, FALSE);
FETCH_FIELD(hv, update_msg, mloaderimage, charp, FALSE);
FETCH_FIELD(hv, update_msg, mp_str, charp, FALSE);
FETCH_FIELD(hv, update_msg, cnode_cnt, uint32_t, FALSE);
FETCH_FIELD(hv, update_msg, node_use, uint16_t, FALSE);
- FETCH_FIELD(hv, update_msg, owner_name, charp, FALSE);
FETCH_FIELD(hv, update_msg, ramdiskimage, charp, FALSE);
FETCH_FIELD(hv, update_msg, reason, charp, FALSE);
FETCH_FIELD(hv, update_msg, state, uint16_t, FALSE);
diff --git a/contribs/perlapi/libslurm/perl/conf.c b/contribs/perlapi/libslurm/perl/conf.c
index d4fe894..d3676c6 100644
--- a/contribs/perlapi/libslurm/perl/conf.c
+++ b/contribs/perlapi/libslurm/perl/conf.c
@@ -149,6 +149,7 @@
STORE_FIELD(hv, conf, propagate_rlimits, charp);
if(conf->propagate_rlimits_except)
STORE_FIELD(hv, conf, propagate_rlimits_except, charp);
+ STORE_FIELD(hv, conf, reconfig_flags, uint16_t);
if(conf->resume_program)
STORE_FIELD(hv, conf, resume_program, charp);
STORE_FIELD(hv, conf, resume_rate, uint16_t);
@@ -340,6 +341,7 @@
FETCH_FIELD(hv, conf, propagate_prio_process, uint16_t, TRUE);
FETCH_FIELD(hv, conf, propagate_rlimits, charp, FALSE);
FETCH_FIELD(hv, conf, propagate_rlimits_except, charp, FALSE);
+ FETCH_FIELD(hv, conf, reconfig_flags, uint16_t, TRUE);
FETCH_FIELD(hv, conf, resume_program, charp, FALSE);
FETCH_FIELD(hv, conf, resume_rate, uint16_t, TRUE);
FETCH_FIELD(hv, conf, resume_timeout, uint16_t, TRUE);
diff --git a/contribs/perlapi/libslurm/perl/node.c b/contribs/perlapi/libslurm/perl/node.c
index 18d2959..245dac2 100644
--- a/contribs/perlapi/libslurm/perl/node.c
+++ b/contribs/perlapi/libslurm/perl/node.c
@@ -135,6 +135,9 @@
/* record_count implied in node_array */
av = newAV();
for(i = 0; i < node_info_msg->record_count; i ++) {
+ if (!node_info_msg->node_array[i].name)
+ continue;
+
hv_info =newHV();
if (node_info_to_hv(node_info_msg->node_array + i,
node_info_msg->node_scaling, hv_info) < 0) {
@@ -196,6 +199,8 @@
{
slurm_init_update_node_msg(update_msg);
+ FETCH_FIELD(hv, update_msg, node_addr, charp, FALSE);
+ FETCH_FIELD(hv, update_msg, node_hostname, charp, FALSE);
FETCH_FIELD(hv, update_msg, node_names, charp, TRUE);
FETCH_FIELD(hv, update_msg, node_state, uint16_t, FALSE);
FETCH_FIELD(hv, update_msg, reason, charp, FALSE);
diff --git a/contribs/perlapi/libslurm/perl/reservation.c b/contribs/perlapi/libslurm/perl/reservation.c
index d922b8d..0d9a243 100644
--- a/contribs/perlapi/libslurm/perl/reservation.c
+++ b/contribs/perlapi/libslurm/perl/reservation.c
@@ -174,7 +174,7 @@
FETCH_FIELD(hv, resv_msg, flags, uint16_t, FALSE);
FETCH_FIELD(hv, resv_msg, licenses, charp, FALSE);
FETCH_FIELD(hv, resv_msg, name, charp, FALSE);
- FETCH_FIELD(hv, resv_msg, node_cnt, uint32_t, FALSE);
+ FETCH_PTR_FIELD(hv, resv_msg, node_cnt, "SLURM::uint32_t", FALSE);
FETCH_FIELD(hv, resv_msg, node_list, charp, FALSE);
FETCH_FIELD(hv, resv_msg, partition, charp, FALSE);
FETCH_FIELD(hv, resv_msg, start_time, time_t, FALSE);
diff --git a/contribs/perlapi/libslurmdb/Makefile.in b/contribs/perlapi/libslurmdb/Makefile.in
index 06c5a9e..19bfb64 100644
--- a/contribs/perlapi/libslurmdb/Makefile.in
+++ b/contribs/perlapi/libslurmdb/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -394,10 +396,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
maintainer-clean-generic:
diff --git a/contribs/perlapi/libslurmdb/perl/cluster.c b/contribs/perlapi/libslurmdb/perl/cluster.c
index f94e949..e84fcec 100644
--- a/contribs/perlapi/libslurmdb/perl/cluster.c
+++ b/contribs/perlapi/libslurmdb/perl/cluster.c
@@ -74,6 +74,7 @@
FETCH_LIST_FIELD(hv, assoc_cond, grp_cpu_run_mins_list);
FETCH_LIST_FIELD(hv, assoc_cond, grp_cpus_list);
FETCH_LIST_FIELD(hv, assoc_cond, grp_jobs_list);
+ FETCH_LIST_FIELD(hv, assoc_cond, grp_mem_list);
FETCH_LIST_FIELD(hv, assoc_cond, grp_nodes_list);
FETCH_LIST_FIELD(hv, assoc_cond, grp_submit_jobs_list);
FETCH_LIST_FIELD(hv, assoc_cond, grp_wall_list);
@@ -167,6 +168,7 @@
FETCH_LIST_FIELD(hv, job_cond, associd_list);
FETCH_LIST_FIELD(hv, job_cond, cluster_list);
FETCH_LIST_FIELD(hv, job_cond, groupid_list);
+ FETCH_LIST_FIELD(hv, job_cond, jobname_list);
FETCH_LIST_FIELD(hv, job_cond, partition_list);
FETCH_LIST_FIELD(hv, job_cond, qos_list);
FETCH_LIST_FIELD(hv, job_cond, resv_list);
diff --git a/contribs/phpext/Makefile.in b/contribs/phpext/Makefile.in
index fce7d41..e96c353 100644
--- a/contribs/phpext/Makefile.in
+++ b/contribs/phpext/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -379,10 +381,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
maintainer-clean-generic:
diff --git a/contribs/sjobexit/Makefile.in b/contribs/sjobexit/Makefile.in
index effe3cb..3250f3f 100644
--- a/contribs/sjobexit/Makefile.in
+++ b/contribs/sjobexit/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -106,6 +106,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(bindir)"
SCRIPTS = $(bin_SCRIPTS)
SOURCES =
@@ -188,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -224,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,10 +415,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/slurm_completion_help/README.md b/contribs/slurm_completion_help/README.md
new file mode 100644
index 0000000..2a02f57
--- /dev/null
+++ b/contribs/slurm_completion_help/README.md
@@ -0,0 +1,45 @@
+slurm-helper
+============
+
+Bunch of helper files for the Slurm resource manager
+
+Vim syntax file
+---------------
+
+The Vim syntax file renders the Slurm batch submission scripts easier to read and to spot errors in the submission options.
+
+As submission scripts are indeed shell scripts, and all Slurm options are actually Shell comments, it can be difficult to spot errors in the options.
+
+This syntax file allows vim to understand the Slurm option and highlight them accordingly. Whenever possible, the syntax rules check the validity of the options and put in a special color what is not recognized as a valid option, or valid parameters values.
+
+__Installation__
+
+Under Linux or MacOS, simply copy the file in the directory
+
+ .vim/after/syntax/sh/
+
+or whatever shell other than ``sh`` you prefer.
+
+The syntax file is then read and applied on a Shell script after the usual syntax file has been processed.
+
+__Known issues__
+
+* Some regex needed to validate options or parameter values are not exactly correct, but should work in most cases.
+* Any new option unknown to the syntax file will be spotted as an error.
+
+Bash completion
+---------------
+
+The Bash completion script offers <TAB> completion for Slurm commands.
+
+At present the following Slurm commands are considered
+* scontrol
+* sreport
+
+__Instalation__
+
+Simply source the script in your .bashrc or .profile
+
+__Knwon issues__
+
+Keyword arguments are not auto-compelted beyond the first one.
diff --git a/contribs/slurm_completion_help/slurm.vim b/contribs/slurm_completion_help/slurm.vim
new file mode 100644
index 0000000..ca0b111
--- /dev/null
+++ b/contribs/slurm_completion_help/slurm.vim
@@ -0,0 +1,176 @@
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+"
+" Vim syntax file for completion for Slurm
+"
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+" Copyright (C) 2012 Damien François. <damien.francois@uclouvain.Be>
+" Written by Damien François. <damien.francois@uclouvain.Be>.
+"
+" This file is part of SLURM, a resource management program.
+" For details, see <http://www.schedmd.com/slurmdocs/>.
+" Please also read the included file: DISCLAIMER.
+"
+" SLURM is free software; you can redistribute it and/or modify it under
+" the terms of the GNU General Public License as published by the Free
+" Software Foundation; either version 2 of the License, or (at your option)
+" any later version.
+"
+" In addition, as a special exception, the copyright holders give permission
+" to link the code of portions of this program with the OpenSSL library under
+" certain conditions as described in each individual source file, and
+" distribute linked combinations including the two. You must obey the GNU
+" General Public License in all respects for all of the code used other than
+" OpenSSL. If you modify file(s) with this exception, you may extend this
+" exception to your version of the file(s), but you are not obligated to do
+" so. If you do not wish to do so, delete this exception statement from your
+" version. If you delete this exception statement from all source files in
+" the program, then also delete it here.
+"
+" SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+" WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+" FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+" details.
+"
+" You should have received a copy of the GNU General Public License along
+" with SLURM; if not, write to the Free Software Foundation, Inc.,
+" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+"
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+" handling /bin/sh with is_kornshell/is_sh {{{1
+" b:is_sh is set when "#! /bin/sh" is found;
+" However, it often is just a masquerade by bash (typically Linux)
+" or kornshell (typically workstations with Posix "sh").
+" So, when the user sets "is_bash" or "is_kornshell",
+" a b:is_sh is converted into b:is_bash/b:is_kornshell,
+" respectively.
+if !exists("b:is_kornshell") && !exists("b:is_bash")
+ if exists("g:is_posix") && !exists("g:is_kornshell")
+ let g:is_kornshell= g:is_posix
+ endif
+ if exists("g:is_kornshell")
+ let b:is_kornshell= 1
+ if exists("b:is_sh")
+ unlet b:is_sh
+ endif
+ elseif exists("g:is_bash")
+ let b:is_bash= 1
+ if exists("b:is_sh")
+ unlet b:is_sh
+ endif
+ else
+ let b:is_sh= 1
+ endif
+endif
+
+" Slurm: {{{1
+" ===================
+" Slurm SBATCH comments are one liners beginning with #SBATCH and containing
+" the keyword (i.e.SBATCH), one option (here only options starting with -- are
+" considered), and one optional value.
+syn region shSlurmComment start="^#SBATCH" end="\n" oneline contains=shSlurmKeyword,shSlurmOption,shSlurmValue
+" all shSlurmString are suspect; they probably could be narrowed down to more
+" specific regular expressions. Typical example is --mail-type or --begin
+syn match shSlurmKeyword contained '#SBATCH\s*'
+syn match shSlurmOption contained '--account=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--acctg-freq=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--extra-node-info=' nextgroup=shSlurmNodeInfo
+syn match shSlurmOption contained '--socket-per-node=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--cores-per-socket=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--threads-per-core=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--begin=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--checkpoint=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--checkpoint-dir=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--comment=' nextgroup=shSlurmIdentifier
+syn match shSlurmOption contained '--constraint=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--contiguous'
+syn match shSlurmOption contained '--cpu-bind==' nextgroup=shSlurmString
+syn match shSlurmOption contained '--cpus-per-task=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--dependency=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--workdir=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--error=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--exclusive'
+syn match shSlurmOption contained '--nodefile=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--get-user-env'
+syn match shSlurmOption contained '--get-user-env=' nextgroup=shSlurmEnv
+syn match shSlurmOption contained '--gid=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--hint=' nextgroup=shSlurmHint
+syn match shSlurmOption contained '--immediate' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--input=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--job-name=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--job-id=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--no-kill'
+syn match shSlurmOption contained '--licences=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--distribution=' nextgroup=shSlurmDist
+syn match shSlurmOption contained '--mail-user=' nextgroup=shSlurmEmail
+syn match shSlurmOption contained '--mail-type=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--mem=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--mem-per-cpu=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--mem-bind=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--mincores=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--mincpus=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--minsockets=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--minthreads=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--nodes=' nextgroup=shSlurmInterval
+syn match shSlurmOption contained '--ntasks=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--network=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--nice'
+syn match shSlurmOption contained '--nice=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--no-requeue'
+syn match shSlurmOption contained '--ntasks-per-core=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--ntasks-per-socket=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--ntasls-per-node=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--overcommit'
+syn match shSlurmOption contained '--output=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--open-mode=' nextgroup=shSlurmMode
+syn match shSlurmOption contained '--partition=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--propagate'
+syn match shSlurmOption contained '--propagate=' nextgroup=shSlurmPropag
+syn match shSlurmOption contained '--quiet'
+syn match shSlurmOption contained '--requeue'
+syn match shSlurmOption contained '--reservation=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--share'
+syn match shSlurmOption contained '--signal=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--time=' nextgroup=shSlurmDuration
+syn match shSlurmOption contained '--tasks-per-node=' nextgroup=shSlurmNumber
+syn match shSlurmOption contained '--tmp=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--uid=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--nodelist=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--wckey=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--wrap=' nextgroup=shSlurmString
+syn match shSlurmOption contained '--exclude=' nextgroup=shSlurmString
+syn region shSlurmValue start="=" end="$" contains=shSlurmNoshSlurmEnvdeInfo,shSlurmString,shSlurmMailType,shSlurmIdentifier,shSlurmEnv,shSlurmHint,shSlurmMode,shSlurmPropag,shSlurmInterval,shSlurmDist,shSlurmEmail
+syn match shSlurmNumber contained '\d\d*'
+syn match shSlurmDuration contained '\d\d*\(:\d\d\)\{,2}'
+syn match shSlurmNodeInfo contained '\d\d*\(:\d\d*\)\{,2}'
+syn match shSlurmDuration contained '\d\d*-\d\=\d\(:\d\d\)\{,2}'
+syn match shSlurmInterval contained '\d\d*\(-\d*\)\='
+syn match shSlurmString contained '.*'
+syn match shSlurmEnv contained '\d*L\=S\='
+syn keyword shSlurmHint contained compute_bound memory_bound nomultithread multithread
+syn keyword shSlurmMode contained append truncate
+syn keyword shSlurmPropag contained ALL AS CORE CPU DATA FSIZE MEMLOCK NOFILE CPROC RSS STACK
+syn keyword shSlurmDist contained block cyclic arbitrary
+syn match shSlurmDist contained 'plane\(=.*\)\='
+syn match shSlurmEmail contained '[-a-zA-Z0-9.+]*@[-a-zA-Z0-9.+]*'
+
+"Anything that is not recognized is marked as error
+hi def link shSlurmComment Error
+"The #SBATCH keyword
+hi def link shSlurmKeyword Function
+"The option
+hi def link shSlurmOption Operator
+"The values
+hi def link shSlurmDuration Special
+hi def link shSlurmString Special
+hi def link shSlurmMailType Special
+hi def link shSlurmNumber Special
+hi def link shSlurmSep Special
+hi def link shSlurmNodeInfo Special
+hi def link shSlurmEnv Special
+hi def link shSlurmHint Special
+hi def link shSlurmMode Special
+hi def link shSlurmPropag Special
+hi def link shSlurmInterval Special
+hi def link shSlurmDist Special
+hi def link shSlurmEmail Special
diff --git a/contribs/slurm_completion_help/slurm_completion.sh b/contribs/slurm_completion_help/slurm_completion.sh
new file mode 100644
index 0000000..d51e1cb
--- /dev/null
+++ b/contribs/slurm_completion_help/slurm_completion.sh
@@ -0,0 +1,185 @@
+###############################################################################
+#
+# Bash completion for Slurm
+#
+###############################################################################
+# Copyright (C) 2012 Damien François. <damien.francois@uclouvain.Be>
+# Written by Damien François. <damien.francois@uclouvain.Be>.
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# In addition, as a special exception, the copyright holders give permission
+# to link the code of portions of this program with the OpenSSL library under
+# certain conditions as described in each individual source file, and
+# distribute linked combinations including the two. You must obey the GNU
+# General Public License in all respects for all of the code used other than
+# OpenSSL. If you modify file(s) with this exception, you may extend this
+# exception to your version of the file(s), but you are not obligated to do
+# so. If you do not wish to do so, delete this exception statement from your
+# version. If you delete this exception statement from all source files in
+# the program, then also delete it here.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+###############################################################################
+
+_scontrol()
+{
+ local cur=${COMP_WORDS[COMP_CWORD]}
+ local prev=${COMP_WORDS[COMP_CWORD-1]}
+
+ local subopts=""
+
+ case "$prev" in
+ node)
+ local pprev=${COMP_WORDS[COMP_CWORD-2]}
+ if [[ "$pprev" == "show" ]]; then
+ subopts=$( scontrol show nodes | grep NodeName | cut -c 10- | cut -f 1 -d' ')
+ elif [[ "$pprev" == "update" ]]; then
+ subopts="NodeName= Features= Gres= Reason= State= Weight="
+ else
+ subopts=""
+ fi
+ ;;
+
+ job)
+ local pprev=${COMP_WORDS[COMP_CWORD-2]}
+ if [[ "$pprev" == "show" ]]; then
+ subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 )
+ elif [[ "$pprev" == "update" ]]; then
+ subopts="Account= Conn-Type= Contiguous= Dependency= EligibleTime="
+ subopts="$subopts ExcNodeList= Features= Geometry= Gres= JobId="
+ subopts="$subopts JobId= MinCpusNode= MinMemoryNode= MinTmpDiskNodea"
+ subopts="$subopts Name= Nice= NodeList= NumCPUs= NumNodes= NumTasks="
+ subopts="$subopts Partition= Priority= QOS= ReqCores= ReqNodelist="
+ subopts="$subopts ReqSockets= ReqThreads= Requeue= ReservationName="
+ subopts="$subopts Rotate= Shared= StartTime= TimeLimit= WCKey="
+ else
+ subopts=""
+ fi
+ ;;
+ show)
+ subopts="config daemons job node partition reservation slurmd step topology"
+ subopts="$subopts hostlist hostnames"
+ ;;
+ shutdown)
+ subopts="slurmctld controller"
+ ;;
+ setdebug)
+ subopts="quiet fatal error info verbose debug debug2 debug3 debug4 debug5"
+ ;;
+ notify | uhold | suspend | release | requeue | resume | hold )
+ subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 )
+ ;;
+ step)
+ subopts="StepId= TimeLimit="
+ ;;
+ delete)
+ subopts="PartitionName= Reservation="
+ ;;
+ reservation)
+ subopts="Reservation= Accounts= Licences= NodeCnt= Nodes= StartTime="
+ subopts="$subopts EndTime= Duration= PartitionName=Flags= Features= Users="
+ ;;
+ partition)
+ subopts="AllowGroups= AllocNodes= Alternate= Default= DefaultTime="
+ subopts="$subopts DisableRootJobs= Hidden= MaxNodes= MatxTime= MinNodes= Nodes="
+ subopts="$subopts PartitionName= PreemtpMode= Piority= RootOnly= Shared= State="
+ ;;
+ create)
+ subopts="partition reservation"
+ ;;
+ update)
+ subopts="job step node partition reservation"
+ ;;
+ disable | enable | able | create | vacate | error | restart)
+ subopts=$( scontrol -o show jobs | cut -d' ' -f 1 | cut -d'=' -f 2 )
+ ;;
+ checkpoint)
+ subopts="disable enable able create vacate error restart"
+ ;;
+ scontrol)
+ if [[ "$cur" == - ]]; then
+ subopts="-a -d -h -M -o -Q -v -V"
+ elif [[ "$cur" == -- ]]; then
+ subopts="--all --details --help --hide --cluster"
+ subopts="$subopts --oneliner --quiet --verbose --version"
+ else
+ subopts="abort checkpoint create completing delete hold notify"
+ subopts="$subopts pidinfo listpids ping reconfigure release requeue"
+ subopts="$subopts resume setdebug show shutdown suspend takeover"
+ subopts="$subopts uhold update version"
+ fi
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ COMPREPLY=( $(compgen -W "${subopts}" -- $cur) )
+}
+complete -F _scontrol scontrol
+
+_sreport()
+{
+ local cur=${COMP_WORDS[COMP_CWORD]}
+ local prev=${COMP_WORDS[COMP_CWORD-1]}
+
+ local subopts=""
+ local opts4all="All_Clusters Clusters= End= Format= Start="
+
+ case "$prev" in
+ user)
+ subopts="TopUsage"
+ ;;
+ TopUsage)
+ subopts="$opts4all Accounts= Group TopCount= Users="
+ ;;
+ reservation)
+ subopts="Utilization"
+ ;;
+ Utilization)
+ subopts="$opts4all Names= Nodes="
+ ;;
+ job)
+ subopts="SizesByAccount SizesByAccountAndWckey SizesByWckey"
+ ;;
+ SizesByAccount|SizesByAccountAndWckey|SizesByWckey)
+ subopts="$opts4all Accounts= FlatView GID= Grouping= Jobs= Nodes= OPartitions= PrintJobCount Users= Wckeys="
+ ;;
+ cluster)
+ subopts="AccountUtilizationByUser UserUtilizationByAccount UserUtilizationByWCKey Utilization WCKeyUtilizationByUser"
+ ;;
+ AccountUtilizationByUser|UserUtilizationByAccount|UserUtilizationByWCKey|Utilization|WCKeyUtilizationByUser)
+ subopts="$opts4all Accounts= Tree Users= Wckeys="
+ ;;
+ sreport)
+ if [[ "$cur" == - ]]; then
+ subopts="-a -n -h -p -P -t -v -V"
+ elif [[ "$cur" == -- ]]; then
+ subopts="--all_clusters --help --noheader --parsable"
+ subopts="$subopts --parsable2--quiet --verbose --version"
+ else
+ subopts="cluster job user reservation"
+ fi
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ COMPREPLY=( $(compgen -W "${subopts}" -- $cur) )
+}
+complete -F _sreport sreport
diff --git a/contribs/slurmdb-direct/Makefile.in b/contribs/slurmdb-direct/Makefile.in
index d4ab579c..3d498c2 100644
--- a/contribs/slurmdb-direct/Makefile.in
+++ b/contribs/slurmdb-direct/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -106,6 +106,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(sbindir)"
SCRIPTS = $(sbin_SCRIPTS)
SOURCES =
@@ -188,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -224,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -408,10 +416,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/contribs/torque/Makefile.in b/contribs/torque/Makefile.in
index ea6c9b2..443fe42 100644
--- a/contribs/torque/Makefile.in
+++ b/contribs/torque/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -106,6 +106,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(bindir)"
SCRIPTS = $(bin_SCRIPTS)
SOURCES =
@@ -188,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -224,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,10 +415,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 1227a79..52852c9 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -583,10 +585,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am
index 1cbc421..bf4754c 100644
--- a/doc/html/Makefile.am
+++ b/doc/html/Makefile.am
@@ -1,51 +1,14 @@
htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
-if HAVE_MAN2HTML
-man_html = \
- ../man/man1/sacct.html \
- ../man/man1/sacctmgr.html \
- ../man/man1/salloc.html \
- ../man/man1/sattach.html \
- ../man/man1/sbatch.html \
- ../man/man1/sbcast.html \
- ../man/man1/scancel.html \
- ../man/man1/scontrol.html \
- ../man/man1/sinfo.html \
- ../man/man1/smap.html \
- ../man/man1/sprio.html \
- ../man/man1/squeue.html \
- ../man/man1/sreport.html \
- ../man/man1/srun.html \
- ../man/man1/srun_cr.html \
- ../man/man1/sshare.html \
- ../man/man1/sstat.html \
- ../man/man1/strigger.html \
- ../man/man1/sview.html \
- ../man/man5/bluegene.conf.html \
- ../man/man5/cgroup.conf.html \
- ../man/man5/gres.conf.html \
- ../man/man5/slurm.conf.html \
- ../man/man5/slurmdbd.conf.html \
- ../man/man5/topology.conf.html \
- ../man/man5/wiki.conf.html \
- ../man/man8/slurmctld.html \
- ../man/man8/slurmd.html \
- ../man/man8/slurmdbd.html \
- ../man/man8/slurmstepd.html \
- ../man/man8/spank.html
-else
- man_html =
-endif
-
generated_html = \
- ${man_html} \
accounting.html \
accounting_storageplugins.html \
api.html \
authplugins.html \
big_sys.html \
bluegene.html \
+ cgroups.html \
checkpoint_blcr.html \
checkpoint_plugins.html \
cons_res.html \
@@ -57,6 +20,7 @@
dist_plane.html \
documentation.html \
download.html \
+ elastic_computing.html \
faq.html \
gang_scheduling.html \
gres.html \
@@ -116,8 +80,10 @@
${generated_html} \
allocation_pies.gif \
arch.gif \
+ bull.jpg \
coding_style.pdf \
configurator.html \
+ configurator.easy.html \
entities.gif \
example_usage.gif \
linuxstyles.css \
@@ -131,6 +97,7 @@
plane_ex6.gif \
plane_ex7.gif \
slurm_banner.gif \
+ slurm_banner_schedmd.png \
slurm_design.pdf \
slurmstyles.css \
sponsors.gif \
@@ -146,12 +113,3 @@
.shtml.html:
`dirname $<`/shtml2html.py $<
-
-if HAVE_MAN2HTML
-.1.html:
- `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-.5.html:
- `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-.8.html:
- `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-endif
diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in
index 755a43a..7843ff6 100644
--- a/doc/html/Makefile.in
+++ b/doc/html/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -37,6 +37,7 @@
target_triplet = @target@
subdir = doc/html
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/configurator.easy.html.in \
$(srcdir)/configurator.html.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
@@ -81,7 +82,7 @@
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
-CONFIG_CLEAN_FILES = configurator.html
+CONFIG_CLEAN_FILES = configurator.html configurator.easy.html
CONFIG_CLEAN_VPATH_FILES =
SOURCES =
DIST_SOURCES =
@@ -106,6 +107,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(htmldir)"
DATA = $(html_DATA)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -186,6 +193,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -222,6 +230,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -309,47 +318,14 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-@HAVE_MAN2HTML_TRUE@man_html = \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sacct.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sacctmgr.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/salloc.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sattach.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sbatch.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sbcast.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/scancel.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/scontrol.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sinfo.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/smap.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sprio.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/squeue.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sreport.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/srun.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/srun_cr.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sshare.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sstat.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/strigger.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man1/sview.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/bluegene.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/cgroup.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/gres.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/slurm.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/slurmdbd.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/topology.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man5/wiki.conf.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man8/slurmctld.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man8/slurmd.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man8/slurmdbd.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man8/slurmstepd.html \
-@HAVE_MAN2HTML_TRUE@ ../man/man8/spank.html
-
generated_html = \
- ${man_html} \
accounting.html \
accounting_storageplugins.html \
api.html \
authplugins.html \
big_sys.html \
bluegene.html \
+ cgroups.html \
checkpoint_blcr.html \
checkpoint_plugins.html \
cons_res.html \
@@ -361,6 +337,7 @@
dist_plane.html \
documentation.html \
download.html \
+ elastic_computing.html \
faq.html \
gang_scheduling.html \
gres.html \
@@ -420,8 +397,10 @@
${generated_html} \
allocation_pies.gif \
arch.gif \
+ bull.jpg \
coding_style.pdf \
configurator.html \
+ configurator.easy.html \
entities.gif \
example_usage.gif \
linuxstyles.css \
@@ -435,6 +414,7 @@
plane_ex6.gif \
plane_ex7.gif \
slurm_banner.gif \
+ slurm_banner_schedmd.png \
slurm_design.pdf \
slurmstyles.css \
sponsors.gif \
@@ -448,7 +428,7 @@
all: all-am
.SUFFIXES:
-.SUFFIXES: .html .1 .5 .8 .shtml
+.SUFFIXES: .html .shtml
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
@@ -481,6 +461,8 @@
$(am__aclocal_m4_deps):
configurator.html: $(top_builddir)/config.status $(srcdir)/configurator.html.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+configurator.easy.html: $(top_builddir)/config.status $(srcdir)/configurator.easy.html.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
mostlyclean-libtool:
-rm -f *.lo
@@ -504,9 +486,7 @@
@$(NORMAL_UNINSTALL)
@list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- test -n "$$files" || exit 0; \
- echo " ( cd '$(DESTDIR)$(htmldir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(htmldir)" && rm -f $$files
+ dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir)
tags: TAGS
TAGS:
@@ -561,10 +541,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
-test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
@@ -657,18 +642,10 @@
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
uninstall uninstall-am uninstall-htmlDATA
-@HAVE_MAN2HTML_FALSE@ man_html =
.shtml.html:
`dirname $<`/shtml2html.py $<
-@HAVE_MAN2HTML_TRUE@.1.html:
-@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-@HAVE_MAN2HTML_TRUE@.5.html:
-@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-@HAVE_MAN2HTML_TRUE@.8.html:
-@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/header.txt $(srcdir)/footer.txt $<
-
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml
index 3391be7..3cfa3e3 100644
--- a/doc/html/accounting.shtml
+++ b/doc/html/accounting.shtml
@@ -139,7 +139,7 @@
and SlurmDBD configuration files (slurm.conf and slurmdbd.conf
respectively, more details are provided below).</p>
-<p?Whether you use any authentication module or not you will need to have
+<p>Whether you use any authentication module or not you will need to have
a way for the SlurmDBD to get uid's for users and/or admin. If using
MUNGE, it is ideal for your users to have the same id on all your
clusters. If this is the case you should have a combination of every clusters
@@ -149,7 +149,7 @@
add anyone you want to be an administrator or operator to the passwd file.
If they plan on running sacctmgr or any of the accounting tools they
should have the same uid, or they will not authentic correctly. An
-LDAP server could also server as a way to gather this information.
+LDAP server could also server as a way to gather this information.</p>
<h2>Slurm JobComp Configuration</h2>
@@ -193,7 +193,8 @@
supported for job and step accounting only.</b> The infrastructure for
PostgresSQL for use with associations is not yet supported, meaning
sacctmgr will not work correctly. If interested in adding this
-capability for PostgresSQL, please contact us at slurm-dev@lists.llnl.gov.
+capability for PostgresSQL, please contact us at
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.
<p>To enable this database support
one only needs to have the development package for the database they
@@ -266,7 +267,8 @@
<li><b>AccountingStoragePass</b>: If using SlurmDBD with a second MUNGE
daemon, store the pathname of the named socket used by MUNGE to provide
-enterprise-wide. Otherwise the default MUNGE daemon will be used.</li>
+enterprise-wide authentication (i.e. /var/run/munge/moab.socket.2). Otherwise
+the default MUNGE daemon will be used. </li>
<li><b>AccountingStoragePort</b>:
The network port that SlurmDBD accepts communication on.</li>
@@ -634,9 +636,18 @@
privileges to this user. Valid options are
<ul>
<li>None</li>
-<li>Operator: can add, modify,and remove users, and add other operators)</li>
-<li>Admin: In addition to operator privileges these users can add, modify,
-and remove accounts and clusters</li>
+<li>Operator: can add, modify, and remove any database object (user,
+account, etc), and add other operators
+<br>On a SlurmDBD served slurmctld these users can<br>
+<ul>
+<li>View information that is blocked to regular uses by a PrivateData
+ flag</li>
+<li>Create/Alter/Delete Reservations</li>
+</ul>
+</li>
+<li>Admin: These users have the same level of privileges as an
+operator in the database. They can also alter anything on a served
+slurmctld as if they were the slurm user or root.</li>
</ul>
<li><b>Cluster=</b> Only add to accounts on these clusters (default is all clusters)</li>
@@ -679,6 +690,13 @@
jobs will be allowed to run.
</li>
+<li><b>GrpCPURunMins=</b> Maximum number of CPU minutes all jobs
+ running with this association and its children can run at the same
+ time. This takes into consideration time limit of running jobs. If
+ the limit is reached no new jobs are started until other jobs finish
+ to allow time to free up.
+</li>
+
<li><b>GrpCPUs=</b> The total count of cpus able to be used at any given
time from jobs running from this association and its children. If
this limit is reached new jobs will be queued but only allowed to
@@ -691,6 +709,12 @@
run after previous jobs complete from this group.
</li>
+<li><b>GrpMemory=</b> The total amount of memory (MB) able to be used
+ at any given time from jobs running from this association and its
+ children. If this limit is reached new jobs will be queued but only
+ allowed to run after resources have been relinquished from this group.
+</li>
+
<li><b>GrpNodes=</b> The total count of nodes able to be used at any given
time from jobs running from this association and its children. If
this limit is reached new jobs will be queued but only allowed to
@@ -777,7 +801,7 @@
If an entity has existed for less than 1 day, the entity will be removed
completely. This is meant to clean up after typographic errors.</p>
-<p style="text-align: center;">Last modified 10 June 2010</p>
+<p style="text-align: center;">Last modified 3 February 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/bluegene.shtml b/doc/html/bluegene.shtml
index 4ac6fbf..5c16660 100644
--- a/doc/html/bluegene.shtml
+++ b/doc/html/bluegene.shtml
@@ -13,9 +13,9 @@
<p>BlueGene systems have several unique features making for a few
differences in how SLURM operates there.
BlueGene systems consists of one or more <i>base partitions</i> or
-<i>midplanes</i> connected in a three-dimensional (BlueGene/L and BlueGene/P
-systems) or five-dimensional (BlueGene/Q) torus.
-Each <i>base partition</i> typically includes 512 <i>c-nodes</i> or compute
+<i>midplanes</i> connected in a three-dimensional - XYZ - (BlueGene/L
+and BlueGene/P systems) or four-dimensional - AXYZ - (BlueGene/Q) torus.
+Each <i>midplane</i> typically includes 512 <i>c-nodes</i> or compute
nodes each containing two or more cores;
one core is typically designed primarily for managing communications while the
other cores are used primarily for computations.
@@ -24,15 +24,20 @@
Thus the <i>slurmd</i> daemon(s) executes on one or more of the BlueGene
<i>Front End Nodes</i>.
The <i>slurmd</i> daemons provide (almost) all of the normal SLURM services
-for every <i>base partition</i> on the system. </p>
+for every <i>midplane</i> on the system. </p>
-<p>Internally SLURM treats each <i>base partition</i> as one node with
-a processor count equal to the number of cores on the base partition, which
-keeps the number of entities being managed by SLURM more reasonable.
-Since the current BlueGene software can sub-allocate a <i>base partition</i>
-into smaller blocks, more than one user job can execute on each <i>base
-partition</i> (subject to system administrator configuration). In the case of
-BlueGene/Q systems, more than one user job can also execute in each block.
+<p>Internally SLURM treats each <i>midplane</i> as one node with
+a processor count equal to the number of cores on the midplane, which
+keeps the number of entities being managed by SLURM more
+reasonable.</p>
+
+<p>All BlueGene systems can sub-allocate a <i>midplane</i>
+into smaller blocks, this allows more than one user job to execute on
+each <i>midplane</i>.</p>
+<p>In the case of BlueGene/Q systems, more than one user job can also
+execute in each block (see AllowSubBlockAllocation option in 'man
+bluegene.conf').</p>
+
To effectively utilize this environment, SLURM tools present the user with
the view that each <i>c-node</i> is a separate node, so allocation requests
and status information use <i>c-node</i> counts.
@@ -40,42 +45,83 @@
to represent multiples of 1024 or "m" for multiples of 1,048,576 (1024 x 1024).
For example, "2k" is equivalent to "2048".</p>
+<p>If you are running a system that is smaller than 1 midplane (a
+nodecard/nodeboard or such) you can configure your system up like
+this in the bluegene.conf file. Below is an example for a BlueGene/Q system:</p>
+<pre>
+# Excerpt from bluegene.conf file for BlueGene/Q system
+...
+BasePartitionNodeCnt=512
+NodeCardNodeCnt=32
+SubMidplaneSystem=YES
+LayoutMode=STATIC
+MPs=0000 type=small 32cnblocks=16
+...
+</pre>
+<p>This will create a small block on each nodeboard on the system. If your
+system is different than this, adjust appropriately. The idea is SLURM
+will create the smallest block possible on every possible hardware
+location. The system will then check for missing hardware and remove
+blocks that are invaild. This will get around the problem if you
+have, for instance, the 4th nodeboard populated instead of the 1st.
+</p>
+
<h2>User Tools</h2>
-<p>The normal set of SLURM user tools: sbatch, scancel, sinfo, squeue, and
-scontrol provide all of the expected services except support for job steps,
-which is detailed later.
-Seven new sbatch options are available:
-<i>--geometry</i> (specify job size in each dimension),
-<i>--no-rotate</i> (disable rotation of geometry),
-<i>--conn-type</i> (specify interconnect type between base partitions, mesh or torus).
-<i>--blrts-image</i> (specify alternative blrts image for bluegene --block. Default if not set, BGL only.)
-<i>--cnload-image</i> (specify alternative c-node image for bluegene block. Default if not set, BGP only.)
-<i>--ioload-image</i> (specify alternative io image for bluegene block. Default if not set, BGP only.)
-<i>--linux-image</i> (specify alternative linux image for bluegene block. Default if not set, BGL only.)
-<i>--mloader-image</i> (specify alternative mloader image for bluegene block. Default if not set).
-<i>--ramdisk-image</i> (specify alternative ramdisk image for bluegene block. Default if not set, BGL only.)
-The <i>--nodes</i> option with a minimum and (optionally) maximum node count continues
-to be available.
+<p>The normal set of SLURM user tools: <i>sbatch</i>, <i>scancel</i>,
+<i>sinfo</i>, <i>squeue</i>, and <i>scontrol</i> provide all of the expected
+services except support for job steps, which is detailed later.</p>
+
+<p>Seven job submission options are available exclusively on BlueGene systems:</p>
+<table>
+<tr VALIGN=TOP><td><i>--geometry</i></td><td>Specify job size in each dimension,
+ (i.e. 1x4x4 = 16 nodes)</td></tr>
+<tr VALIGN=TOP><td><i>--no-rotate</i></td><td>Disable rotation of geometry, by default
+ 1x4x4 could be rotated to be 4x1x4)</td>
+<tr VALIGN=TOP><td><i>--conn-type</i></td><td>Specify interconnect
+ type between midplanes, mesh or torus. On BlueGene/Q systems you can
+ specify a different conn-type for each dimension, TTMT would
+ give you Torus in all dimensions except the Y dimension, where
+ it would be Mesh.</td></tr>
+<tr VALIGN=TOP><td><i>--blrts-image</i></td><td>(BlueGene/L systems only)
+ Specify alternative blrts image for bluegene block. Default if not set.</td></tr>
+<tr VALIGN=TOP><td><i>--cnload-image</i></td><td>(BlueGene/P systems only) Specify
+ alternative c-node image for bluegene block. Default if not set.</td></tr>
+<tr VALIGN=TOP><td><i>--ioload-image</i></td><td>(BlueGene/P systems only) Specify
+ alternative io image for bluegene block. Default if not set.</td></tr>
+<tr VALIGN=TOP><td><i>--linux-image</i></td><td>(BlueGene/L systems only)
+ Specify alternative linux image for bluegene block. Default if not set.</td></tr>
+<tr VALIGN=TOP><td><i>--mloader-image</i></td><td>Specify
+ alternative mloader image for bluegene block. Default if not set.</td></tr>
+<tr VALIGN=TOP><td><i>--ramdisk-image</i></td><td>(BlueGene/L or P systems only)
+ Specify alternative ramdisk image for bluegene block. Default if not set.</td></tr>
+</table>
+
+<p>The <i>--nodes</i> option with a minimum and (optionally) maximum node count
+continues to be available.
Note that this is a c-node count.</p>
<h3>Task Launch on BlueGene/Q only</h3>
-<p>Use SLURM's srun command to launch tasks (srun is a wrapper for IBM's
-<i>runjob</i> command.
-SLURM job step information including accounting functions as expected.</p>
+<p>Use SLURM's <i>srun</i> command to launch tasks (<i>srun</i> uses
+an api interface into IBM's <i>runjob</i> command).
+SLURM job step information, including accounting, functions as
+expected. Totalview and other debuggers will also work with srun.
+If SLURM is installed and configured correctly IBM's runjob will not
+work.</p>
<h3>Task Launch on BlueGene/L and BlueGene/P only</h3>
<p>SLURM performs resource allocation for the job, but initiation of tasks is
performed using the <i>mpirun</i> command. SLURM has no concept of a job step
on BlueGene/L or BlueGene/P systems.
-To reiterate: salloc or sbatch are used to create a job allocation, but
-<i>mpirun</i> is used to launch the parallel tasks.
+To reiterate: <u><i>salloc</i> or <i>sbatch</i> are used to create a job allocation, but
+<i>mpirun</i> is used to launch the parallel tasks.</u>
The script that you submit to SLURM can contain multiple invocations of mpirun
as well as any desired commands for pre- and post-processing.
-The mpirun command will get its <i>bgblock</i> information from the
-<i>MPIRUN_PARTITION</i> as set by SLURM. A sample script is shown below.</p>
+The mpirun command will get its <i>block</i> information from the
+<i>MPIRUN_PARTITION</i> environment variable as set by SLURM. A sample script
+is shown below.</p>
<pre>
#!/bin/bash
# pre-processing
@@ -88,33 +134,50 @@
</pre>
<h3><a name="naming">Naming Conventions</a></h3>
-<p>The naming of base partitions includes a numeric suffix representing the its
+<p>The naming of midplanes includes a numeric suffix representing the its
coordinates with a zero origin. The suffix contains three digits on BlueGene/L
and BlueGene/P systems, while four digits are required for the BlueGene/Q
-systems. For example, "bgp012" represents the base partition whose coordinate
+systems. For example, "bgp012" represents the midplane whose coordinate
is at X=0, Y=1 and Z=2.
-SLURM uses an abbreviated format for describing base partitions in which the
+SLURM uses an abbreviated format for describing midplanes in which the
end-points of the block enclosed are in square-brackets and separated by an "x".
-For example, "bgp[620x731]" is used to represent the eight base partitions
+For example, "bgp[620x731]" is used to represent the eight midplanes
enclosed in a block with end-points and bgp620 and bgp731 (bgp620, bgp621,
bgp630, bgp631, bgp720, bgp721, bgp730 and bgp731).</p>
-<p><b>IMPORTANT:</b> SLURM higher can support up to 36 elements in each
+<p><b>IMPORTANT:</b> SLURM can support up to 36 elements in each
BlueGene dimension by supporting "A-Z" as valid numbers. SLURM requires the
prefix to be lower case and any letters in the suffix must always be upper
-case. This schema must be used in both the slurm.conf and bluegene.conf
+case. This schema must be used in both the <i>slurm.conf</i> and bluegene.conf
configuration files when specifying midplane/node names (the prefix is
optional). This schema should also be used to specify midplanes or locations
-in configure mode of smap:
+in configure mode of <i>smap</i>:
<br>
valid: bgl[000xC44], bgl000, bglZZZ
<br>
invalid: BGL[000xC44], BglC00, bglb00, Bglzzz
</p>
+<p><b>IMPORTANT:</b> SLURM requires that all systems start with 0 in each
+dimension. So if you have a BlueGene/Q system and only want SLURM to
+run on a portion of it you need to define the entire system and mark
+midplanes down in the slurm.conf file or with scontrol/sview.
+<br>
+In example of this with a BGQ system of [0000x2333] but only can
+use [2000x2333] you could define it in your slurm.conf like this.
+<pre>
+...
+NodeName=bgq[0000x1333] state=down
+NodeName=bgq[2000x2333] state=unknown
+...
+</pre>
+This would mark your nodes not managed as down and only create blocks
+on the portion of the machine you want to use.
+</p>
+
<p>In a system configured with <i>small blocks</i> (any block less
-than a full base partition) there will be divisions in the base partition
-notation. On BlueGene/L and BlueGene/P systems, the base partition name may
+than a full midplane), there will be divisions in the midplane
+notation. On BlueGene/L and BlueGene/P systems, the midplane name may
be followed by a square bracket enclosing ID numbers of the IO nodes associated
with the block. For example, if there are 64 psets in a BlueGene/L
configuration, "bgl012[0-15]" represents the first quarter or first 16 IO nodes
@@ -129,10 +192,10 @@
<p>Two topology-aware graphical user interfaces are provided: <i>smap</i> and
<i>sview</i> (<i>sview</i> provides more viewing and configuring options).
See each command's man page for details.
-A sample of smap output is provided below showing the location of five jobs.
-Note the format of the list of base partitions allocated to each job.
-Also note that idle (unassigned) base partitions are indicated by a period.
-Down and drained base partitions (those not available for use) are
+A sample of <i>smap</i> output is provided below showing the location of five jobs.
+Note the format of the list of midplanes allocated to each job.
+Also note that idle (unassigned) midplanes are indicated by a period.
+Down and drained midplanes (those not available for use) are
indicated by a number sign (bg703 in the display below).
The legend is for illustrative purposes only.
The origin (zero in every dimension) is shown at the rear left corner of the bottom plane.
@@ -145,7 +208,7 @@
a a a a b b d d a 12345 batch RMP0 joseph tst1 R 43:12 32k bg[000x333]
a a a a b b c c b 12346 debug RMP1 chris sim3 R 12:34 8k bg[420x533]
a a a a b b c c c 12350 debug RMP2 danny job3 R 0:12 4k bg[622x733]
- d 12356 debug RMP3 dan colu R 18:05 8k bg[600x731]
+ d 12356 debug RMP3 dan colu R 18:05 8k bg[600x731]
a a a a b b d d e 12378 debug RMP4 joseph asx4 R 0:34 2k bg[612x713]
a a a a b b d d
a a a a b b c c
@@ -155,41 +218,41 @@
a a a a . . d d
a a a a . . e e Y
a a a a . . e e |
- |
+ |
a a a a . . d d 0----X
a a a a . . d d /
a a a a . . . . /
a a a a . . . # Z
</pre>
-<p>Note that jobs enter the SLURM state RUNNING as soon as the have been
-allocated a bgblock.
-If the bgblock is in a READY state, the job will begin execution almost
+<p>If the block is in a READY state, the job will begin execution almost
immediately.
Otherwise the execution of the job will not actually begin until the
-bgblock is in a READY state, which can require booting the block and
-a delay of minutes to do so.
-You can identify the bgblock associated with your job using the command
-<i>smap -Dj -c</i> and the state of the bgblock with the command
+block is in a READY state, which can require booting the block and
+a delay of minutes to do so. During this time a job will be in the
+CONFIGURING state.
+You can identify the block associated with your job using the command
+<i>smap -Dj -c</i> and the state of the block with the command
<i>smap -Db -c</i>.
-The time to boot a bgblock is related to its size, but should range from
-from a few minutes to about 15 minutes for a bgblock containing 128
-base partitions.
-Only after the bgblock is READY will your job's output file be created
+The time to boot a block is related to its size, but should range from
+from a few minutes to about 15 minutes for a block containing 128
+midplanes (on a BlueGene/L system).
+Only after the block is READY will your job's output file be created
and the script execution begin.
-If the bgblock boot fails, SLURM will attempt to reboot several times
-before draining the associated base partitions and aborting the job.</p>
+If the block boot fails, SLURM will attempt to reboot several times (3)
+before draining the associated midplanes and aborting/requeueing the job.</p>
<p>The job will continue to be in a RUNNING state until the bgjob has
-completed and the bgblock ownership is changed.
+completed and the block ownership is changed.
The time for completing a bgjob has frequently been on the order of
five minutes.
In summary, your job may appear in SLURM as RUNNING for 15 minutes
before the script actually begins to 5 minutes after it completes.
These delays are the result of the BlueGene infrastructure issues and are
-not due to anything in SLURM.</p>
+not due to anything in SLURM. These times have improved considerably on the
+more recent BlueGene/P and BlueGene/Q systems.</p>
-<p>When using smap in default output mode you can scroll through
+<p>When using <i>smap</i> in default output mode you can scroll through
the different windows using the arrow keys.
The <b>up</b> and <b>down</b> arrow keys scroll
the window containing the grid, and the <b>left</b> and <b>right</b> arrow
@@ -197,63 +260,151 @@
<p class="footer"><a href="#top">top</a></p>
-<h2>System Administration</h2>
+<h2>System Administration for BlueGene/Q only</h2>
+
+<p><b>IMPORTANT:</b> The SlurmUser defined in the slurm.conf must be
+added to the bgadmin group. This allows the slurmctld to access
+information from the system and manipulate blocks.
+</p>
+
+<p>In order to make <i>srun</i> operate correctly with the underlying system
+and to ensure security for new MPI jobs, it is necessary to enable the
+SLURM plugin for the IBM runjob_mux. This is
+done by altering the bg.properties file. In the [runjob.mux]
+section of the bg.properties file change the plugin option to
+<i>$prefix/lib/slurm/runjob_plugin.so</i> and also set the plugin_flags
+option to <i>0x0109</i> (RTLD_LAZY | RTLD_GLOBAL | RTLD_DEEPBIND)
+which allows the forwarding of symbols to shared objects like SLURM
+uses for plugins.</p>
+<pre>
+[runjob.mux]
+...
+plugin = /usr/lib64/slurm/runjob_plugin.so
+ # Path to the plugin used for communicating with a
+ # job scheduler. This value can be updated by the
+ # runjob_mux_refresh_config command on the
+ # Login Node where a runjob_mux process runs.
+...
+plugin_flags = 0x0109 # RTLD_LAZY | RTLD_GLOBAL | RTLD_DEEPBIND
+</pre>
+
+<p>You also need to set things up so the runjob_mux is ran by the
+ SlurmUser. This can be done by editting 2 files.</p>
+<p>Back in your bg.properties file alter the [master.user] section.
+<pre>
+[master.user]
+...
+runjob_mux=<b>slurm</b>
+</pre>
+<p>Then in /etc/init.d/bgagent add SlurmUser to the --users line.</p>
+<pre>
+OPTIONS="--users bgqadmin,bgws,bgqsysdb,<b>slurm</b>"
+</pre>
+<p>After these settings are set flush the runjob_server and (re)start
+ each runjob_mux running on your system.</p>
+<pre>
+> /bgsys/drivers/ppcfloor/sbin/master_stop binaries
+stopped
+> sudo /etc/init.d/bgagent restart
+Shutting down bgagentd: [ OK ]
+Starting bgagentd:
+Startup of bgagentd completed: [ OK ]
+> /bgsys/drivers/ppcfloor/sbin/master_start binaries
+> /bgsys/drivers/ppcfloor/sbin/bgmaster_server_refresh_config
+success!
+> /bgsys/drivers/ppcfloor/sbin/master_start runjob_mux
+started runjob_mux
+> ps aux | grep runjob_mux
+<b>slurm</b> 25461 0.0 0.3 518528 48064 ? Sl 13:00 0:00 runjob_mux
+</pre>
+
+<p>When a new version of SLURM is installed it is a wise idea to "refresh" the
+runjob_mux with the new plugin. This can be done in one of two ways.
+<ul>
+<li>Stopping and restarting the runjob_mux. While this
+ option works every time jobs running under the runjob_mux will not
+ survive so plan your updates accordingly.
+<pre>
+> /bgsys/drivers/ppcfloor/sbin/master_stop runjob_mux
+stopped runjob_mux
+> /bgsys/drivers/ppcfloor/sbin/master_start runjob_mux
+started runjob_mux
+</pre>
+</li>
+<li><b>WARNING! You need at least IBM driver V1R1M1 efix 008 or this
+method will not work. Pervious versions would load the old
+plugin (presumably still in memory) other than the new one. As of SLURM
+2.4.2 when the plugin is loaded it will print the version of SLURM
+which should let you know if the new one is loaded or not.</b>
+<p>This method allows no job loss using the
+IBM <i>runjob_mux_refresh_config</i> command. This should
+reload the plugin and all should be good afterwards. After doing this
+you may see some warning/error messages about the current running jobs
+when finishing not being known. This is expected and can usually be
+ignored.</li>
+</ul></p>
+
+<b>Notes about sub-block allocations:</b>
+<p>
+There is a current limitation for sub-block jobs and how the system
+(used for I/O) and user (used for MPI) torus class routes are configured. The
+network device hardware has cutoff registers to prevent packets from flowing
+outside of the sub-block. Unfortunately, when the sub-block has a size 3,
+the job can attempt to send user packets outside of its sub-block. This causes
+it to be terminated by signal 36. To prevent this from happening SLURM does
+not allow a sub-block to be used with any dimension of 3.
+</p><p>
+In the current IBM API it does not allow wrapping inside a midplane.
+Meaning you can not create a sub-block of 2 with nodes in the 0 and 3 position.
+SLURM will support this in the future when the underlying system allows it.
+</p>
+
+<p class="footer"><a href="#top">top</a></p>
+
+<h2>System Administration for all BlueGene/L Systems</h2>
<p>Building a BlueGene compatible system is dependent upon the
<i>configure</i> program locating some expected files.
In particular for a BlueGene/L system, the configure script searches
-for <i>libdb2.so</i> in the directories <i>/home/bgdb2cli/sqllib</i>
-and <i>/u/bgdb2cli/sqllib</i>. If your DB2 library file is in a
-different location, use the configure
+for <i>libdb2.so</i> in the
+directories <i>/bgl/BlueLight/ppcfloor/bglsys</i>, <i>/opt/IBM/db2/V8.1</i>
+<i>/home/bgdb2cli/sqllib</i> and <i>/u/bgdb2cli/sqllib</i>. If your
+DB2 library file is in a different location, use the configure
option <i>--with-db2-dir=PATH</i> to specify the parent directory.
-If you have the same version of the operating system on both the
-Service Node (SN) and the Front End Nodes (FEN) then you can configure
-and build one set of files on the SN and install them on both the SN and FEN.
-Note that all smap functionality will be provided on the FEN
-except for the ability to map SLURM node names to and from
-row/rack/midplane data, which requires direct use of the Bridge API
-calls only available on the SN.</p>
+This option does not apply to any other BlueGene arch.</p>
-<p>If you have different versions of the operating system on the SN and FEN
-(as was the case for some early system installations), then you will need
-to configure and build two sets of files for installation.
-One set will be for the Service Node (SN), which has direct access to the
-Bridge APIs.
-The second set will be for the Front End Nodes (FEN), which lack access to the
-Bridge APIs and interact with using Remote Procedure Calls to the slurmctld
-daemon.
-You should see "#define HAVE_BG 1" and "#define HAVE_FRONT_END 1" in the "config.h"
-file for both the SN and FEN builds.
-You should also see "#define HAVE_BG_FILES 1" in config.h on the SN before
-building SLURM. </p>
+<h2>System Administration for all BlueGene Systems</h2>
-<p>The slurmctld daemon should execute on the system's service node.
+<p>The <i>slurmctld</i> daemon should execute on the system's service node.
If an optional backup daemon is used, it must be in some location where
it is capable of executing Bridge APIs.
-The slurmd daemons executes the user scripts and there must be at least one
-front end node configured for this purpose. Multiple front end nodes may be
-configured for slurmd use to improve performance and fault tolerance.
-Each slurmd can execute jobs for every base partition and the work will be
-distributed among the slurmd daemons to balance the workload.
-You can use the scontrol command to drain individual compute nodes as desired
-and return them to service.</p>
+The <i>slurmd</i> daemons executes the user scripts and there must be
+at least one front end node configured for this purpose. Multiple
+front end nodes may be configured for <i>slurmd</i> use to improve
+performance and fault tolerance. Each <i>slurmd</i> can execute
+jobs for every midplane and the work will be distributed among
+the <i>slurmd</i> daemons to balance the workload. You can use
+the <i>scontrol</i> command to drain individual compute nodes as
+desired and return them to service.</p>
<p>The <i>slurm.conf</i> (configuration) file needs to have the value of
<i>InactiveLimit</i> set to zero or not specified (it defaults to a value of zero).
-This is because if there are no job steps, we don't want to purge jobs prematurely.
-The value of <i>SelectType</i> must be set to "select/bluegene" in order to have
+This is because we don't want to purge jobs prematurely if there are no job steps.
+The value of <i>SelectType</i> must be set to "select/bluegene" (which
+happens automatically) in order to have
node selection performed using a system aware of the system's topography
and interfaces.
The value of <i>Prolog</i> should be set to the full pathname of a program that
-will delay execution until the bgblock identified by the MPIRUN_PARTITION
-environment variable is ready for use. It is recommended that you construct a script
+will delay execution until the job's block is ready for
+use by the user running the job. It is recommended that you construct a script
that serves this function and calls the supplied program <i>sbin/slurm_prolog</i>.
The value of <i>Epilog</i> should be set to the full pathname of a program that
-will wait until the bgblock identified by the MPIRUN_PARTITION environment
-variable is no longer usable by this job. It is recommended that you construct a script
-that serves this function and calls the supplied program <i>sbin/slurm_epilog</i>.
+will wait until the job's block has relinquished the resources aquired
+by the job and is no longer usable by this job. It is recommended that
+you construct a script that serves this function and calls the
+supplied program <i>sbin/slurm_epilog</i>.
The prolog and epilog programs are used to insure proper synchronization
-between the slurmctld daemon, the user job, and MMCS.
+between the <i>slurmctld</i> daemon, the user job, and MMCS.
A multitude of other functions may also be placed into the prolog and
epilog as desired (e.g. enabling/disabling user logins, purging file systems,
etc.). Sample prolog and epilog scripts follow. </p>
@@ -262,7 +413,7 @@
#!/bin/bash
# Sample BlueGene Prolog script
#
-# Wait for bgblock to be ready for this job's use
+# Wait for block to be ready for this job's use
/usr/sbin/slurm_prolog
</pre>
@@ -271,53 +422,53 @@
# Sample BlueGene Epilog script
#
# Cancel job to start the termination process for this job
-# and release the bgblock
+# and release the block
/usr/bin/scancel $SLURM_JOB_ID
#
-# Wait for bgblock to be released from this job's use
+# Wait for block to be released from this job's use
/usr/sbin/slurm_epilog
</pre>
<p>Since jobs with different geometries or other characteristics might not
interfere with each other, scheduling is somewhat different on a BlueGene
-system than typical clusters.
-SLURM's builtin scheduler on BlueGene will sort pending jobs and then attempt
-to schedule <b>all</b> of them in priority order.
-This essentially functions as if there is a separate queue for each job size.
-SLURM's backfill scheduler on BlueGene will enforce FIFO (first-in first-out)
-scheduling with backfill (lower priority jobs will start early if doing so
-will not impact the expected initiation time of a higher priority job).
-As on other systems, effective backfill relies upon users setting reasonable
-job time limits.
-Note that SLURM does support different partitions with an assortment of
+system than typical clusters.</p>
+
+<p>Starting in 2.4.3 SchedType=sched/backfill works in all modes and
+for all job sizes. Before this release there were issues backfilling
+jobs smaller than a midplane. It is encourged to upgrade to at least
+2.4.3 for better backfill behavior.</p>
+
+<p>SLURM does support different partitions with an assortment of
different scheduling parameters.
For example, SLURM can have defined a partition for full system jobs that
is enabled to execute jobs only at certain times; while a default partition
could be configured to execute jobs at other times.
Jobs could still be queued in a partition that is configured in a DOWN
state and scheduled to execute when changed to an UP state.
-Base partitions can also be moved between slurm partitions either by changing
-the <i>slurm.conf</i> file and restarting the slurmctld daemon or by using
-the scontrol reconfig command. </p>
+Midplanes can also be moved between SLURM partitions either by changing
+the <i>slurm.conf</i> file and restarting the <i>slurmctld</i> daemon or by using
+the <i>scontrol</i> reconfig command. </p>
<p>SLURM node and partition descriptions should make use of the
<a href="#naming">naming</a> conventions described above. For example,
-"NodeName=bg[000x733] CPUs=1024"
-is used in <i>slurm.conf</i> to define a BlueGene system with 128 midplanes
-in an 8 by 4 by 4 matrix and each midplane is configured with 1024 processors
-(cores).
-The node name prefix of "bg" defined by NodeName can be anything you want,
-but needs to be consistent throughout the <i>slurm.conf</i> file.
-No computer is actually expected to a hostname of "bg000" and no
-attempt will be made to route message traffic to this address.</p>
+"NodeName=bg[000x733]"
+is used in <i>slurm.conf</i> to define a BlueGene/L system with 128 midplanes
+in an 8 by 4 by 4 matrix. The node name prefix of "bg" defined by
+NodeName can be anything you want, but needs to be consistent
+throughout the <i>slurm.conf</i> file. No computer is actually
+expected to a hostname of "bg000" and no attempt will be made to route
+message traffic to this address. Starting in version 2.4, SLURM can determine
+how many Sockets, CoresPerSocket, and ThreadsPerCore are available on each
+midplane, so no configuration is needed to determine how many cores
+are on each midplane.</p>
-<p>Front end nodes used for executing the slurmd daemons must also be defined
+<p>Front end nodes used for executing the <i>slurmd</i> daemons must also be defined
in the <i>slurm.conf</i> file.
It is recommended that at least two front end nodes be dedicated to use by
-the slurmd daemons for fault tolerance.
+the <i>slurmd</i> daemons for fault tolerance.
For example:
"FrontendName=frontend[00-03] State=UNKNOWN"
-is used to define four front end nodes for running slurmd daemons.</p>
+is used to define four front end nodes for running <i>slurmd</i> daemons.</p>
<pre>
# Portion of slurm.conf for BlueGene system
@@ -327,143 +478,145 @@
Epilog=/usr/sbin/epilog
#
FrontendName=frontend[00-01] State=UNKNOWN
-NodeName=bg[000x733] CPUs=1024 State=UNKNOWN
+NodeName=bg[000x733] State=UNKNOWN
</pre>
-<p>While users are unable to initiate SLURM job steps on BlueGene/L or BlueGene/P
-systems, this restriction does not apply to user root or <i>SlurmUser</i>.
-Be advised that the slurmd daemon is unable to manage a large number of job
-steps, so this ability should be used only to verify normal SLURM operation.
-If large numbers of job steps are initiated by slurmd, expect the daemon to
-fail due to lack of memory or other resources.
-It is best to minimize other work on the front end nodes executing slurmd
+<p>It is best to minimize other work on the front end nodes executing <i>slurmd</i>
so as to maximize its performance and minimize other risk factors.</p>
-<a name="bluegene-conf"><h2>Bluegene.conf File Creation</h2></a>
+<a name="bluegene-conf"><h2>bluegene.conf File Creation</h2></a>
<p>In addition to the normal <i>slurm.conf</i> file, a new
<i>bluegene.conf</i> configuration file is required with information pertinent
to the system.
Put <i>bluegene.conf</i> into the SLURM configuration directory with
<i>slurm.conf</i>.
A sample file is installed in <i>bluegene.conf.example</i>.
-System administrators should use the <i>smap</i> tool to build appropriate
-configuration file for static partitioning.
+If a System administrators chooses against dynamic partitioning for
+some reason they should use the <i>smap</i> tool to build appropriate
+configuration file for static/overlap partitioning.
Note that <i>smap -Dc</i> can be run without the SLURM daemons
active to establish the initial configuration.
-Note that the bgblocks defined using smap may not overlap (except for the
-full-system bgblock, which is implicitly created).
-See the smap man page for more information.</p>
+Note when using static partitioning the blocks defined
+using <i>smap</i> may not overlap (except for the full-system block,
+which is implicitly created).
+See the <i>smap</i> man page for more information.</p>
<p>There are 3 different modes which the system administrator can define
-BlueGene partitions (or bgblocks) available to execute jobs: static,
+BlueGene partitions (or blocks) available to execute jobs: static,
overlap, and dynamic.
-Jobs must then execute in one of the created bgblocks.
-(<b>NOTE:</b> bgblocks are unrelated to SLURM partitions.)</p>
+Jobs must then execute in one of the created blocks.
+(<b>NOTE:</b> blocks are unrelated to SLURM partitions.)</p>
<p>The default mode of partitioning is <i>static</i>.
In this mode, the system administrator must explicitly define each
-of the bgblocks in the <i>bluegene.conf</i> file.
-Each of these bgblocks are explicitly configured with either a
+of the blocks in the <i>bluegene.conf</i> file.
+Each of these blocks are explicitly configured with either a
mesh or torus interconnect.
They must also not overlap, except for the implicitly defined full-system
-bgblock.
-Note that bgblocks are not rebooted between jobs in the mode
+block.
+Note that blocks are not rebooted between jobs in the mode
except when going to/from full-system jobs.
-Eliminating bgblock booting can significantly improve system
+Eliminating block booting can significantly improve system
utilization (eliminating boot time) and reliability.</p>
<p>The second mode is <i>overlap</i> partitioning.
Overlap partitioning is very similar to static partitioning in that
-each bgblocks must be explicitly defined in the <i>bluegene.conf</i>
+each blocks must be explicitly defined in the <i>bluegene.conf</i>
file, but these partitions can overlap each other.
-In this mode <b>it is highly recommended that none of the bgblocks
+In this mode <b>it is highly recommended that none of the blocks
have any passthroughs in the X-dimension associated to them</b>.
Usually this is only an issue on larger BlueGene systems.
<b>It is advisable to use this mode with extreme caution.</b>
-Make sure you know what you doing to assure the bgblocks will
-boot without dependency on the state of any base partition
-not included the bgblock.</p>
+Make sure you know what you doing to assure the blocks will
+boot without dependency on the state of any midplane
+not included the block.</p>
-<p>In the two previous modes you must insure that the base
-partitions defined in <i>bluegene.conf</i> are consistent with
+<p>In the two previous modes you must insure that the midplanes
+defined in <i>bluegene.conf</i> are consistent with
those defined in <i>slurm.conf</i>.
Note the <i>bluegene.conf</i> file contains only the numeric
-coordinates of base partitions while <i>slurm.conf</i> contains
+coordinates of midplanes while <i>slurm.conf</i> contains
the name prefix in addition to the numeric coordinates.</p>
<p>The final mode is <i>dynamic</i> partitioning.
-Dynamic partitioning was developed primarily for smaller BlueGene systems,
-but can be used on larger systems.
-Dynamic partitioning may introduce fragmentation of resources.
-This fragmentation may be severe since SLURM will run a job anywhere
-resources are available with little thought of the future.
-As with overlap partitioning, <b>use dynamic partitioning with
-caution!</b>
-This mode can result in job starvation since smaller jobs will run
-if resources are available and prevent larger jobs from running.
-Bgblocks need not be assigned in the <i>bluegene.conf</i> file
+While dynamic partitioning was developed primarily for smaller
+BlueGene systems, it is commonly used on larger systems.
+A warning about dynamic partitioning is it may introduce fragmentation
+of resources. Dynamic partitioning is very capable,
+easy to set up, and is the default for many systems including LLNL's
+Sequoia. With the advent of sub-block allocations (see
+AllowSubBlockAllocation option in 'man bluegene.conf') fragmentation has
+become less of a concern.
+Blocks need not be assigned in the <i>bluegene.conf</i> file
for this mode.</p>
-<p>Blocks can be freed or set in an error state with scontrol,
-(i.e. "<i>scontrol update BlockName=RMP0 state=error</i>").
-This will end any job on the block and set the state of the block to ERROR
+<p>Blocks can be freed or set in an error state using the <i>scontrol</i>,
+command (i.e. "<i>scontrol update BlockName=RMP0 state=error</i>").
+This will terminate any job on the block and set the state of the block to ERROR
making it so no job will run on the block. To set it back to a usable
-state, set the state to free (i.e.
-"<i>scontrol update BlockName=RMP0 state=free</i>").
+state, you can resume the block with the <i>scontrol</i> option state=resume
+(i.e. "<i>scontrol update BlockName=RMP0 state=resume</i>"). This is useful
+if you temporarily put the block in an error state and the block is
+really booted and ready to start jobs. You can also put the block
+in free state using the state=free option. Valid states are Error, Free,
+Recreate, Remove, Resume.
-<p>Alternatively, if only part of a base partition needs to be put
+<p>Alternatively, if only part of a midplane needs to be put
into an error state which isn't already in a block of the size you
-need, you can set a collection of IO nodes into an error state using scontrol
-(i.e. "<i>scontrol update subbpname=bg000[0-3] state=error</i>").
+need, you can set a collection of IO nodes into an error state using
+<i>scontrol</i> (i.e. "<i>scontrol update submpname=bg000[0-3]
+ state=error</i>").
+<b>NOTE:</b> Even on BGQ where node names are given in bg0000[00000] format
+this option takes an ionode name bg0000[0].
+
This will end any job on the nodes listed, create a block there, and set
the state of the block to ERROR making it so no job will run on the
-block. To set it back to a usable state set the state to free (i.e.
-"<i>scontrol update BlockName=RMP0 state=free</i>" or
- "<i>scontrol update subbpname=bg000[0-3] state=free</i>"). This is
- helpful to allow other jobs to run on the unaffected nodes in
- the base partition.
-
+block. Then resume the block when it is ready to be used again (i.e.
+"<i>scontrol update BlockName=RMP0 state=resume</i>"). This is
+helpful to allow other jobs to run on the unaffected nodes in
+the midplane.</p>
<p>One of these modes must be defined in the <i>bluegene.conf</i> file
with the option <i>LayoutMode=MODE</i> (where MODE=STATIC, DYNAMIC or OVERLAP).</p>
-<p>The number of c-nodes in a base partition and in a node card must
+<p>The number of c-nodes in a midplane and in a node card must
be defined.
-This is done using the keywords <i>BasePartitionNodeCnt=NODE_COUNT</i>
+This is done using the keywords <i>MidplaneNodeCnt=NODE_COUNT</i>
and <i>NodeCardNodeCnt=NODE_COUNT</i> respectively in the <i>bluegene.conf</i>
-file (i.e. <i>BasePartitionNodeCnt=512</i> and <i>NodeCardNodeCnt=32</i>).</p>
+file (i.e. <i>MidplaneNodeCnt=512</i> and <i>NodeCardNodeCnt=32</i>).</p>
-<p>Note that the <i>Numpsets</i> values defined in
-<i>bluegene.conf</i> is used only when SLURM creates bgblocks this
-determines if the system is IO rich or not. For most BlueGene/L
+<p>Note that the <i>IONodesPerMP</i> value defined in
+<i>bluegene.conf</i> represents how many ionodes are on each midplane.
+SLURM does not support heterogeneous ionode configurations so if your
+environment is like this place the smallest number here. For most BlueGene/L
systems this value is either 8 (for IO poor systems) or 64 (for IO rich
-systems).</p>
+systems). For BlueGene/Q systems 4 to 16 are most common.</p>
<p>The <i>Images</i> file specifications identify which images are used when
-booting a bgblock and the valid images are different for each BlueGene system
+booting a block and the valid images are different for each BlueGene system
type (e.g. L, P and Q). Their values can change during job allocation based on
input from the user.
-If you change the bgblock layout, then slurmctld and slurmd should
+If you change the block layout, then <i>slurmctld</i> and <i>slurmd</i> should
both be cold-started (without preserving any state information,
"/etc/init.d/slurm startclean").</p>
-<p>If you wish to modify the <i>Numpsets</i> values
-for existing bgblocks, either modify them manually or destroy the bgblocks
+<p>If you wish to modify the <i>IONodesPerMP</i> value after blocks
+have already been created, either modify the blocks manually or destroy them
and let SLURM recreate them.
-Note that in addition to the bgblocks defined in <i>bluegene.conf</i>, an
-additional bgblock is created containing all resources defined
-all of the other defined bgblocks.
+Note that in addition to the blocks defined in <i>bluegene.conf</i>, an
+additional block is created containing all resources defined
+all of the other defined blocks.
Make use of the SLURM partition mechanism to control access to these
-bgblocks.
-A sample <i>bluegene.conf</i> file is shown below.
+blocks.
+A sample <i>bluegene.conf</i> file is shown below.</p>
<pre>
###############################################################################
# Global specifications for a BlueGene/L system
#
-# BlrtsImage: BlrtsImage used for creation of all bgblocks.
-# LinuxImage: LinuxImage used for creation of all bgblocks.
-# MloaderImage: MloaderImage used for creation of all bgblocks.
-# RamDiskImage: RamDiskImage used for creation of all bgblocks.
+# BlrtsImage: BlrtsImage used for creation of all blocks.
+# LinuxImage: LinuxImage used for creation of all blocks.
+# MloaderImage: MloaderImage used for creation of all blocks.
+# RamDiskImage: RamDiskImage used for creation of all blocks.
#
# You may add extra images which a user can specify from the srun
# command line (see man srun). When adding these images you may also add
@@ -475,15 +628,14 @@
# AltMloaderImage: Alternative MloaderImage(s).
# AltRamDiskImage: Alternative RamDiskImage(s).
#
-# LayoutMode: Mode in which slurm will create blocks:
-# STATIC: Use defined non-overlapping bgblocks
-# OVERLAP: Use defined bgblocks, which may overlap
-# DYNAMIC: Create bgblocks as needed for each job
-# BasePartitionNodeCnt: Number of c-nodes per base partition
+# LayoutMode: Mode in which SLURM will create blocks:
+# STATIC: Use defined non-overlapping blocks
+# OVERLAP: Use defined blocks, which may overlap
+# DYNAMIC: Create blocks as needed for each job
+# MidplaneNodeCnt: Number of c-nodes per midplane
# NodeCardNodeCnt: Number of c-nodes per node card.
-# Numpsets: The Numpsets used for creation of all bgblocks
-# equals this value multiplied by the number of
-# base partitions in the bgblock.
+# IONodesPerMP: Number of ionodes per midplane, needed to
+# determine smallest creatable block..
#
# BridgeAPILogFile: Pathname of file in which to write the
# Bridge API logs.
@@ -526,20 +678,20 @@
AltRamDiskImage=* Groups=da,adamb
LayoutMode=STATIC
-BasePartitionNodeCnt=512
+MidplaneNodeCnt=512
NodeCardNodeCnt=32
-NumPsets=64 # An I/O rich environment
+IONodesPerMP=64 # An I/O rich environment
BridgeAPILogFile=/var/log/slurm/bridgeapi.log
BridgeAPIVerbose=0
#DenyPassthrough=X,Y,Z
###############################################################################
-# Define the static/overlap partitions (bgblocks)
+# Define the static/overlap partitions (blocks)
#
-# BPs: The base partitions (midplanes) in the bgblock using XYZ coordinates
+# BPs: The midplanes (midplanes) in the block using XYZ coordinates
# Type: Connection type "MESH" or "TORUS" or "SMALL", default is "TORUS"
-# Type SMALL will divide a midplane into multiple bgblocks
+# Type SMALL will divide a midplane into multiple blocks
# based off options NodeCards and Quarters to determine type of
# small blocks.
#
@@ -547,109 +699,106 @@
# * Ordering is very important for laying out switch wires. Please create
# blocks with smap, and once done don't move the order of blocks
# created.
-# * A bgblock is implicitly created containing all resources on the system
-# * Bgblocks must not overlap (except for implicitly created bgblock)
+# * A block is implicitly created containing all resources on the system
+# * Blocks must not overlap (except for implicitly created block)
# This will be the case when smap is used to create a configuration file
-# * All Base partitions defined here must also be defined in the slurm.conf file
-# * Define only the numeric coordinates of the bgblocks here. The prefix
+# * All midplanes defined here must also be defined in the slurm.conf file
+# * Define only the numeric coordinates of the blocks here. The prefix
# will be based upon the name defined in slurm.conf
###############################################################################
-# LEAVE NEXT LINE AS A COMMENT, Full-system bgblock, implicitly created
+# LEAVE NEXT LINE AS A COMMENT, Full-system block, implicitly created
# BPs=[000x001] Type=TORUS # 1x1x2 = 2 midplanes
###############################################################################
# volume = 1x1x1 = 1
BPs=[000x000] Type=TORUS # 1x1x1 = 1 midplane
BPs=[001x001] Type=SMALL 32CNBlocks=4 128CNBlocks=3 # 1x1x1 = 4-Nodecard sized
- # c-node blocks 3-Base
- # Partition Quarter sized
- # c-node blocks
+ # c-node blocks 3-Base
+ # Partition Quarter sized
+ # c-node blocks
+</pre>
-</pre></p>
-
-<p>The above <i>bluegene.conf</i> file defines multiple bgblocks to be
+<p>The above <i>bluegene.conf</i> file defines multiple blocks to be
created in a single midplane (see the "SMALL" option).
Using this mechanism, up to 32 independent jobs each consisting of
32 c-nodes can be executed
simultaneously on a one-rack BlueGene system.
-If defining bgblocks of <i>Type=SMALL</i>, the SLURM partition
+If defining blocks of <i>Type=SMALL</i>, the SLURM partition
containing them as defined in <i>slurm.conf</i> must have the
parameter <i>Shared=force</i> to enable scheduling of multiple
jobs on what SLURM considers a single node.
-SLURM partitions that do not contain bgblocks of <i>Type=SMALL</i>
+SLURM partitions that do not contain blocks of <i>Type=SMALL</i>
may have the parameter <i>Shared=no</i> for a slight improvement in
scheduler performance.
As in all SLURM configuration files, parameters and values
are case insensitive.</p>
-<p>The valid image names on a BlueGene/P system are CnloadImage, MloaderImage,
-and IoloadImage. The only image name on BlueGene/Q systems is MloaderImage.
-Alternate images may be specified as described above for all BlueGene system
-types.</p>
+<p>The valid image names on a BlueGene/P system are <i>CnloadImage</i>,
+<i>MloaderImage</i>, and <i>IoloadImage</i>. The only image name on BlueGene/Q
+systems is <i>MloaderImage</i>. Alternate images may be specified as described
+above for all BlueGene system types.</p>
-<p>One more thing is required to support SLURM interactions with
-the DB2 database (at least as of the time this was written).
-DB2 database access is required by the slurmctld daemon only.
-All other SLURM daemons and commands interact with DB2 using
-remote procedure calls, which are processed by slurmctld.
+<p>On BlueGene/L and BlueGene/P systems DB2 database access is required by
+the <i>slurmctld</i> daemon. All other SLURM daemons and commands
+interact with DB2 using remote procedure calls, which are processed
+by <i>slurmctld</i>.
DB2 access is dependent upon the environment variable
<i>BRIDGE_CONFIG_FILE</i>.
Make sure this is set appropriate before initiating the
-slurmctld daemon.
+<i>slurmctld</i> daemon.
If desired, this environment variable and any other logic
can be executed through the script <i>/etc/sysconfig/slurm</i>,
which is automatically executed by <i>/etc/init.d/slurm</i>
prior to initiating the SLURM daemons.</p>
-<p>When slurmctld is initially started on an idle system, the bgblocks
+<p>When <i>slurmctld</i> is initially started on an idle system, the blocks
already defined in MMCS are read using the Bridge APIs.
-If these bgblocks do not correspond to those defined in the <i>bluegene.conf</i>
-file, the old bgblocks with a prefix of "RMP" are destroyed and new ones
+If these blocks do not correspond to those defined in the <i>bluegene.conf</i>
+file, the old blocks with a prefix of "RMP" are destroyed and new ones
created.
-When a job is scheduled, the appropriate bgblock is identified,
+When a job is scheduled, the appropriate block is identified,
its user set, and it is booted.
-Node use (virtual or coprocessor) is set from the mpirun command line now,
+On BlueGene/L and BlueGene/P systems Node use (virtual or coprocessor)
+is set from the mpirun command line.
SLURM has nothing to do with setting the node use.
-Subsequent jobs use this same bgblock without rebooting by changing
+Subsequent jobs use this same block without rebooting by changing
the associated user field.
-The only time bgblocks should be freed and rebooted, in normal operation,
+The only time blocks should be freed and rebooted, in normal operation,
is when going to or from full-system
-jobs (two or more bgblocks sharing base partitions can not be in a
+jobs (two or more blocks sharing midplanes can not be in a
ready state at the same time).
When this logic became available at LLNL, approximately 85 percent of
-bgblock boots were eliminated and the overhead of job startup went
+block boots were eliminated and the overhead of job startup went
from about 24% to about 6% of total job time.
-Note that bgblocks will remain in a ready (booted) state when
+Note that blocks will remain in a ready (booted) state when
the SLURM daemons are stopped.
This permits SLURM daemon restarts without loss of running jobs
-or rebooting of bgblocks. </p>
+or rebooting of blocks. </p>
-<p>Be aware that SLURM will issue multiple bgblock boot requests as
+<p>Be aware that SLURM will issue multiple block boot requests as
needed (e.g. when the boot fails).
-If the bgblock boot requests repeatedly fail, SLURM will configure
-the failing base partitions to a DRAINED state so as to avoid continuing
+If the block boot requests repeatedly fail (>3 times), SLURM will configure
+the failing block to an ERROR state so as to avoid continuing
repeated reboots and the likely failure of user jobs.
A system administrator should address the problem before returning
-the base partitions to service.</p>
+the block to service with scontrol.</p>
-<p>If the slurmctld daemon is cold-started (<b>/etc/init.d/slurm startclean</b>
-or <b>slurmctld -c</b>) it is recommended that the slurmd daemon(s) be
+<p>If the <i>slurmctld</i> daemon is cold-started (<i>/etc/init.d/slurm startclean</i>
+or <i>slurmctld -c</i>) it is recommended that the <i>slurmd</i> daemon(s) be
cold-started at the same time.
-Failure to do so may result in errors being reported by both slurmd
-and slurmctld due to bgblocks that previously existed being deleted.</p>
-
-<p>A new tool <i>sfree</i> has also been added to help system
-administrators free a bgblock on request (i.e.
-"<i>sfree --bgblock=<blockname></i>").
-Run <i>sfree --help</i> for more information.</p>
+Failure to do so may result in errors being reported by both <i>slurmd</i>
+and <i>slurmctld</i> due to blocks that previously existed being deleted.</p>
<h4>Resource Reservations</h4>
<p>SLURM's advance reservation mechanism can accept a node count specification
-as input rather than identification of specific nodes/midplanes. In that case,
-SLURM may reserve nodes/midplanes which may not be formed into an appropriate
-bgblock. Work is planned for SLURM version 2.4 to remedy this problem. Until
-that time, identifying the specific nodes/midplanes to be included in an
-advanced reservation may be necessary.</p>
+as input rather than identification of specific nodes/midplanes. In SLURM
+version 2.4, an attempt will be made to select nodes which can be used to
+create a single block of the specified size. Multiple block sizes can also be
+specified and a reservation will be made that includes those block sizes
+(e.g. <i>scontrol create reservation nodecnt=4k,2k ...</i>). In earlier
+versions of SLURM, the nodes/midplanes selected for a reservation when
+specifying a node count might not be suitable for creating block(s) of the
+desired size(s).</p>
<p>SLURM's advance reservation mechanism is designed to reserve resources
at the level of whole nodes, which on a BlueGene systems would represent
@@ -665,15 +814,15 @@
"<i>Licenses=cnode*512</i>". Then create an advanced reservation with a
command like this:<br>
"<i>scontrol create reservation licenses="cnode*32" starttime=now duration=30:00 users=joe</i>".<br>
-Jobs run in this reservation will then have <b>at least</b> 32 c-nodes
+Jobs run in this reservation will then have <u>at least</u> 32 c-nodes
available for their use, but could use more given an appropriate workload.</p>
<p>There is also a job_submit/cnode plugin available for use that will
automatically set a job's license specification to match its c-node request
(i.e. a command like<br>
"<i>sbatch -N32 my.sh</i>" would automatically be translated to<br>
-"<i>sbatch -N32 --licenses=cnode*32 my.sh</i>" by the slurmctld daemon.
-Enable this plugin in the slurm.conf configuration file with the option
+"<i>sbatch -N32 --licenses=cnode*32 my.sh</i>" by the <i>slurmctld</i> daemon.
+Enable this plugin in the <i>slurm.conf</i> configuration file with the option
"<i>JobSubmitPlugins=cnode</i>".</p>
<h4>Debugging</h4>
@@ -689,26 +838,26 @@
in <i>bluegene.conf</i> which can be configured to contain detailed
information about every Bridge API call issued.</p>
-<p>Note that slurmcltld log messages of the sort
-<i>Nodes bg[000x133] not responding</i> are indicative of the slurmd
-daemon serving as a front-end to those base partitions is not responding (on
-non-BlueGene systems, the slurmd actually does run on the compute
+<p>Note that <i>slurmctld</i> log messages of the sort
+<i>Nodes bg[000x133] not responding</i> are indicative of the <i>slurmd</i>
+daemon serving as a front-end to those midplanes is not responding (on
+non-BlueGene systems, the <i>slurmd</i> actually does run on the compute
nodes, so the message is more meaningful there). </p>
<p>Note that you can emulate a BlueGene/L system on stand-alone Linux
system.
-Run <b>configure</b> with the <b>--enable-bgl-emulation</b> option.
-This will define "HAVE_BG", "HAVE_BGL", and "HAVE_FRONT_END" in the
-config.h file.
+Run <i>configure</i> with the <i>--enable-bgl-emulation</i> option.
+This will define "HAVE_BG", "HAVE_BGL", "HAVE_BG_L_P", and
+"HAVE_FRONT_END" in the config.h file.
You can also emulate a BlueGene/P system with
-the <b>--enable-bgp-emulation</b> option.
-This will define "HAVE_BG", "HAVE_BGP", and "HAVE_FRONT_END" in the
-config.h file.
+the <i>--enable-bgp-emulation</i> option.
+This will define "HAVE_BG", "HAVE_BGP", "HAVE_BG_L_P", and
+"HAVE_FRONT_END" in the config.h file.
You can also emulate a BlueGene/Q system using
-the <b>--enable-bgq-emulation</b> option.
+the <i>--enable-bgq-emulation</i> option.
This will define "HAVE_BG", "HAVE_BGQ", and "HAVE_FRONT_END" in the
config.h file.
-Then execute <b>make</b> normally.
+Then execute <i>make</i> normally.
These variables will build the code as if it were running
on an actual BlueGene computer, but avoid making calls to the
Bridge library (that is controlled by the variable "HAVE_BG_FILES",
@@ -717,6 +866,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 16 August 2011</p>
+<p style="text-align:center;">Last modified 8 August 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/cgroups.shtml b/doc/html/cgroups.shtml
new file mode 100644
index 0000000..44899f7
--- /dev/null
+++ b/doc/html/cgroups.shtml
@@ -0,0 +1,176 @@
+<!--#include virtual="header.txt"-->
+
+<h1>Cgroups Guide</h1>
+<h2>Cgroups Overview</h2>
+For a comprehensive description of Linux Control Groups (cgroups) see the
+<a href="http://www.kernel.org/doc/Documentation/cgroups/cgroups.txt">
+cgroups documentation</A> at kernel.org. Detailed knowledge of cgroups is not
+required to use cgroups in SLURM, but a basic understanding of the
+following features of cgroups is helpful:
+<ul>
+<li><b>Cgroup</b> - a container for a set of processes subject to common
+controls or monitoring, implemented as a directory and a set of files
+(state objects) in the cgroup
+virtual filesystem.</li>
+<li><b>Subsystem</b> - a module, typically a resource controller, that applies
+a set of parameters to the cgroups in a hierarchy.</li>
+<li><b>Hierarchy</b> - a set of cgroups organized in a tree structure, with one
+or more associated subsystems.</li>
+<li><b>State Objects</b> - pseudofiles that represent the state of a cgroup or
+apply controls to a cgroup:
+<ul>
+<li><i>tasks</i> - identifies the processes (PIDs) in the cgroup.
+<li><i>release_agent</i> - specifies the location of the script or program to
+be called when the cgroup becomes empty.</li>
+<li><i>notify_on_release</i> - controls whether the release_agent is called for
+the cgroup.</li>
+<li>additional state objects specific to each subsystem.</li>
+</ul>
+</ul>
+<br>
+<h2>Use of Cgroups in SLURM</h2>
+SLURM provides cgroup versions of a number of plugins.
+<ul>
+<li>proctrack (process tracking)</li>
+<li>task (task management)</li>
+<li>jobacct_gather (job accounting statistics)</li>
+The cgroup plugins can provide a number of benefits over the
+other more standard plugins, as described below.
+</ul>
+<br>
+<h2>SLURM Cgroups Configuration Overview</h2>
+There are several sets of configuration options for SLURM cgroups:
+<ul>
+<li><a href="slurm.conf.html">slurm.conf</a> provides options to enable the
+cgroup plugins. Each plugin may be enabled or disabled independently
+of the others.</li>
+<li><a href="cgroup.conf.html">cgroup.conf</a> provides general options that
+are common to all cgroup plugins, plus additional options that apply only to
+specific plugins.</li>
+<li>Additional configuration is required to enable automatic removal of SLURM
+cgroups when they are no longer in use.
+See <a href="#cleanup">Cleanup of SLURM Cgroups</a> below for details.</li>
+</ul>
+<a name="available"></a>
+<br>
+<h2>Currently Available Cgroup Plugins</h2>
+<h3>proctrack/cgroup plugin</h3>
+The proctrack/cgroup plugin is an alternative to other proctrack
+plugins such as proctrack/linux for process tracking and
+suspend/resume capability. proctrack/cgroup uses the freezer subsystem
+which is more reliable for tracking and control than proctrack/linux.
+<p>
+To enable this plugin, configure the following option in slurm.conf:
+<pre>ProctrackType=proctrack/cgroup</pre>
+</p>
+There are no specific options for this plugin in cgroup.conf, but the general
+options apply. See the <a href="cgroup.conf.html">cgroup.conf</a> man page for
+details.
+<h3>task/cgroup plugin</h3>
+The task/cgroup plugin is an alternative other task plugins such as
+task/affinity plugin for task management. task/cgroup provides the
+following features:
+<ul>
+<li>The ability to confine jobs and steps to their allocated cpuset.</li>
+<li>The ability to bind tasks to sockets, cores and threads within their step's
+allocated cpuset on a node.</li>
+<ul>
+<li>Supports block and cyclic distribution of allocated cpus to tasks for
+binding.</li>
+</ul>
+<li>The ability to confine jobs and steps to specific memory resources.</li>
+<li>The ability to confine jobs to their allocated set of generic resources
+(gres devices).</li>
+</ul>
+The task/cgroup plugin uses the cpuset, memory and devices subsystems.
+<p>
+To enable this plugin, configure the following option in slurm.conf:
+<pre>TaskPlugin=task/cgroup</pre>
+</p>
+There are many specific options for this plugin in cgroup.conf. The general
+options also apply. See the <a href="cgroup.conf.html">cgroup.conf</a> man page
+for details.
+<h3>jobacct_gather/cgroup plugin</h3>
+<b>At present, jobacct_gather/cgroup should be considered experimental.</b>
+<p>
+The jobacct_gather/cgroup plugin is an alternative to the jobacct_gather/linux
+plugin for the collection of accounting statistics for jobs, steps and tasks.
+The cgroup plugin may provide improved performance over jobacct_gather/linux.
+jobacct_gather/cgroup uses the cpuacct and memory subsystems. Note: the cpu and
+memory statistics collected by this plugin do not represent the same resources
+as the cpu and memory statistics collected by the jobacct_gather/linux plugin
+(sourced from /proc stat).
+<p>
+To enable this plugin, configure the following option in slurm.conf:
+<pre>JobacctGatherType=jobacct_gather/cgroup</pre>
+</p>
+There are no specific options for this plugin in cgroup.conf, but the general
+options apply. See the <a href="cgroup.conf.html">cgroup.conf</a> man page for
+details.
+<br><br>
+<h2>Organization of SLURM Cgroups</h2>
+SLURM cgroups are organized as follows. A base directory (mount point) is
+created at /cgroup, or as configured by the <i>CgroupMountpoint</I> option in
+<a href="cgroup.conf.html">cgroup.conf</a>. All cgroup
+hierarchies are created below this base directory. A separate hierarchy is
+created for each cgroup subsystem in use. The name of the root cgroup in each
+hierarchy is the subsystem name. A cgroup named <i>slurm</i> is created below
+the root cgroup in each hierarchy. Below each <i>slurm</i> cgroup, cgroups for
+SLURM users, jobs, steps and tasks are created dynamically as needed. The names
+of these cgroups consist of a prefix identifying the SLURM entity (user, job,
+step or task), followed by the relevant numeric id. The following example shows
+the path of the task cgroup in the cpuset hierarchy for taskid#2 of stepid#0 of
+jobid#123 for userid#100, using the default base directory (/cgroup):
+<p><pre>/cgroup/cpuset/slurm/uid_100/job_123/step_0/task_2</pre></p>
+Note that this structure applies to a specific compute node. Jobs that use more
+than one node will have a cgroup structure on each node.
+<a name="cleanup"></a>
+<br><br>
+<h2>Cleanup of SLURM Cgroups</h2>
+Linux provides a mechanism for the automatic removal of a cgroup when its
+state changes from non-empty to empty. A cgroup is empty when no processes are
+attached to it and it has no child cgroups. The SLURM cgroups implementation
+allows this mechanism to be used to automatically remove the relevant SLURM
+cgroups when tasks, steps and jobs terminate. To enable this automatic removal
+feature, follow these steps:
+<ul>
+<li>If desired, configure the location of the SLURM Cgroup release agent
+directory. This is done using the <i>CgroupReleaseAgentDir</i> option in
+<a href="cgroup.conf.html">cgroup.conf</a>.
+The default location is /etc/slurm/cgroup.</li>
+<br>
+<pre>
+ [sulu] (slurm) etc> cat cgroup.conf | grep CgroupReleaseAgentDir
+ CgroupReleaseAgentDir="/etc/slurm/cgroup"
+</pre>
+<li>Create the common release agent file. This file should be named
+<i>release_common</i>. An example script for this file is provided in the
+SLURM delivery at etc/cgroup.release_common.example. The example script will
+automatically remove user, job, step and task cgroups as they become empty. The
+file must have execute permission for root.</li><br>
+<li>Create release agent files for each cgroup subsystem to be used by SLURM.
+This depends on which cgroup plugins are enabled. For example, the
+proctrack/cgroup plugin uses the <i>freezer</i> subsystem. See
+<a href="#available">Currently Available Cgroup Plugins</a> above to find out
+which subsystems are used by each plugin. The name of each release agent file
+must be of the form <i>release_<subsystem name></i>. These files should
+be created as symbolic links to the common release agent file,
+<i>release_common</i>. The files must have execute permission for root. See
+the following example.</li>
+<br>
+<pre>
+ [sulu] (slurm) etc> ls -al /etc/slurm/cgroup
+ total 12
+ drwxr-xr-x 2 root root 4096 2010-04-23 14:55 .
+ drwxr-xr-x 4 root root 4096 2010-07-22 14:48 ..
+ -rwxrwxrwx 1 root root 234 2010-04-23 14:52 release_common
+ lrwxrwxrwx 1 root root 32 2010-04-23 11:04 release_cpuset -> /etc/slurm/cgroup/release_common
+ lrwxrwxrwx 1 root root 32 2010-04-23 11:03 release_freezer -> /etc/slurm/cgroup/release_common
+
+</pre>
+</ul>
+<p class="footer"><a href="#top">top</a></p>
+
+<p style="text-align:center;">Last modified 6 June 2012</p>
+
+<!--#include virtual="footer.txt"-->
diff --git a/doc/html/checkpoint_blcr.shtml b/doc/html/checkpoint_blcr.shtml
index 31281bb..b400f59 100644
--- a/doc/html/checkpoint_blcr.shtml
+++ b/doc/html/checkpoint_blcr.shtml
@@ -28,8 +28,8 @@
</ol>
<p><b>NOTE:</b> checkpoint/blcr cannot restart interactive jobs. It can
-create checkpoints for both interactive and batch steps, but only
-batch jobs can be restarted.</p>
+create checkpoints for both interactive and batch steps, but <b>only
+batch jobs can be restarted.</b></p>
<p><b>NOTE:</b> BLCR operation has been verified with MVAPICH2.
Some other MPI implementations should also work.</p>
@@ -193,6 +193,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 26 January 2010</p>
+<p style="text-align:center;">Last modified 26 March 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/configurator.easy.html.in b/doc/html/configurator.easy.html.in
new file mode 100644
index 0000000..c904a3d
--- /dev/null
+++ b/doc/html/configurator.easy.html.in
@@ -0,0 +1,551 @@
+<!--
+Copyright (C) 2005-2007 The Regents of the University of California.
+Copyright (C) 2008-2011 Lawrence Livermore National Security.
+Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+Written by Morris Jette <jette1@llnl.gov> and Danny Auble <da@llnl.gov>
+
+This file is part of SLURM, a resource management program.
+For details, see <http://www.schedmd.com/slurmdocs/>.
+
+SLURM is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along
+with SLURM; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+-->
+<HTML>
+<HEAD><TITLE>SLURM System Configuration Tool</TITLE>
+<SCRIPT type="text/javascript">
+<!--
+function get_field(name,form)
+{
+ if (form.value)
+ return name + "=" + form.value
+ return "#" + name + "="
+}
+
+function get_field2(name,form)
+{
+ if (form.value)
+ return name + "=" + form.value
+ return ""
+}
+
+function get_radio_field_skipfirst(name,form)
+{
+ for (var i=1; i < form.length; i++)
+ {
+ if (form[i].checked)
+ {
+ return name + "=" + form[i].value
+ }
+ }
+ return "#" + name + "="
+}
+
+function get_radio_value(form)
+{
+ for (var i=0; i < form.length; i++)
+ {
+ if (form[i].checked)
+ {
+ return form[i].value
+ }
+ }
+}
+
+// When choosing SelectTypeParameters make sure the select_type
+// matches the parameter being checked
+function set_select_type(form)
+{
+ if (form.name == "cons_res_params") {
+ document.config.select_type[0].click() // cons_res
+ } else if (form.name == "linear_params") {
+ document.config.select_type[1].click() // linear
+ } else if (form.name == "bluegene_params") {
+ document.config.select_type[2].click() // bluegene
+ } else if (form.name == "cray_params") {
+ document.config.select_type[3].click() // cray
+ }
+}
+
+// When generating SelectTypeParameters return the parameters that
+// correspond to the checked select_type
+function get_select_type_params()
+{
+ if (document.config.select_type[0].checked) { // cons_res
+ return get_radio_field_skipfirst("SelectTypeParameters",
+ document.config.cons_res_params)
+ } else if (document.config.select_type[1].checked) { // linear
+ return "#SelectTypeParameters="
+ } else if (document.config.select_type[2].checked) { // bluegene
+ return "#SelectTypeParameters="
+ } else if (document.config.select_type[3].checked) { // cray
+ return "#SelectTypeParameters="
+ }
+}
+
+// When generating TaskPluginParam return the parameters that
+// correspond to the checked task_plugin
+function get_task_plugin_param()
+{
+ for (var i=0; i<document.config.task_plugin.length; i++) {
+ if (document.config.task_plugin[i].checked) {
+ if (document.config.task_plugin[i].value == "affinity") {
+ return "TaskPluginParam=" +
+ get_radio_value(document.config.task_plugin_param)
+ }
+ }
+ }
+ return "#TaskPluginParam="
+}
+
+function hide_box()
+{
+ var popup = document.getElementById('out_box');
+ popup.style.visibility = 'hidden';
+
+}
+
+function displayfile()
+{
+ var printme = "# slurm.conf file generated by configurator easy.html.<br>" +
+ "# Put this file on all nodes of your cluster.<br>" +
+ "# See the slurm.conf man page for more information.<br>" +
+ "#<br>" +
+ "ControlMachine=" + document.config.control_machine.value + "<br>" +
+ get_field("ControlAddr",document.config.control_addr) + "<br>" +
+ "# <br>" +
+ "#MailProg=/bin/mail <br>" +
+ "MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" +
+ "#MpiParams=ports=#-# <br>" +
+ "ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" +
+ "ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
+ "SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
+ "#SlurmctldPort=6817 <br>" +
+ "SlurmdPidFile=" + document.config.slurmd_pid_file.value + "<br>" +
+ "#SlurmdPort=6818 <br>" +
+ "SlurmdSpoolDir=" + document.config.slurmd_spool_dir.value + "<br>" +
+ "SlurmUser=" + document.config.slurm_user.value + "<br>" +
+ "#SlurmdUser=root <br>" +
+ "StateSaveLocation=" + document.config.state_save_location.value + "<br>" +
+ "SwitchType=switch/" + get_radio_value(document.config.switch_type) + "<br>" +
+ "TaskPlugin=task/" + get_radio_value(document.config.task_plugin) + "<br>" +
+ "# <br>" +
+ "# <br>" +
+ "# TIMERS <br>" +
+ "#KillWait=30 <br>" +
+ "#MinJobAge=300 <br>" +
+ "#SlurmctldTimeout=120 <br>" +
+ "#SlurmdTimeout=300 <br>" +
+ "# <br>" +
+ "# <br>" +
+ "# SCHEDULING <br>" +
+ "FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" +
+ "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
+ "#SchedulerPort=7321 <br>" +
+ "SelectType=select/" + get_radio_value(document.config.select_type) + "<br>" +
+ "# <br>" +
+ "# <br>" +
+ "# LOGGING AND ACCOUNTING <br>" +
+ "AccountingStorageType=accounting_storage/" + get_radio_value(document.config.accounting_storage_type) + "<br>" +
+ get_field("ClusterName",document.config.cluster_name) + "<br>" +
+ "#JobAcctGatherFrequency=30 <br>" +
+ "JobAcctGatherType=jobacct_gather/" + get_radio_value(document.config.job_acct_gather_type) + "<br>" +
+ "#SlurmctldDebug=3 <br>" +
+ get_field("SlurmctldLogFile",document.config.slurmctld_logfile) + "<br>" +
+ "#SlurmdDebug=3 <br>" +
+ get_field("SlurmdLogFile",document.config.slurmd_logfile) + "<br>" +
+ "# <br>" +
+ "# <br>" +
+ "# COMPUTE NODES <br>" +
+ "NodeName=" + document.config.node_name.value +
+ get_field2(" NodeAddr",document.config.node_addr) +
+ get_field2(" CPUs",document.config.procs) +
+ get_field2(" RealMemory",document.config.memory) +
+ get_field2(" Sockets",document.config.sockets) +
+ get_field2(" CoresPerSocket",document.config.cores_per_socket) +
+ get_field2(" ThreadsPerCore",document.config.threads_per_core) +
+ " State=UNKNOWN <br>" +
+ "PartitionName=" + document.config.partition_name.value +
+ " Nodes=" + document.config.node_name.value +
+ " Default=YES"+
+ " MaxTime=" + document.config.max_time.value +
+ " State=UP"
+
+ //scroll(0,0);
+ //var popup = document.getElementById('out_box');
+
+ //popup.innerHTML = "<a href='javascript:hide_box();'>close</a><br>";
+ //popup.innerHTML += "#BEGIN SLURM.CONF FILE<br><br>";
+ //popup.innerHTML += printme;
+ //popup.innerHTML += "<br><br>#END SLURM.CONF FILE<br>";
+ //popup.innerHTML += "<a href='javascript:hide_box();'>close</a>";
+
+ //popup.style.visibility = 'visible';
+
+ // OLD CODE
+ document.open();
+ document.write(printme);
+ document.close();
+}
+
+-->
+</SCRIPT>
+<!-- <div style='visibility:hidden;text-align:left;background:#ccc;border:1px solid black;position: absolute;left:100;z-index:1;padding:5;' id='out_box'></div> -->
+</HEAD>
+<BODY>
+<FORM name=config>
+<H1>SLURM Version @SLURM_MAJOR@.@SLURM_MINOR@ Configuration Tool</H1>
+<P>This form can be used to create a SLURM configuration file with
+you controlling many of the important configuration parameters.</P>
+
+<p>This is a simplified version of the SLURM configuration tool. This version
+has fewer options for creating a SLURM configuration file. The full version
+of the SLURM configuration tool is available at
+<a href="configurator.html">configurator.html</a>.</p>
+
+<P><B>This tool supports SLURM version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
+Configuration files for other versions of SLURM should be built
+using the tool distributed with it in <i>doc/html/configurator.html</i>.
+Some parameters will be set to default values, but you can
+manually edit the resulting <I>slurm.conf</I> as desired
+for greater flexibility. See <I>man slurm.conf</I> for more
+details about the configuration parameters.</P>
+
+<P>Note the while SLURM daemons create log files and other files as needed,
+it treats the lack of parent directories as a fatal error.
+This prevents the daemons from running if critical file systems are
+not mounted and will minimize the risk of cold-starting (starting
+without preserving jobs).</P>
+
+<P>Note that this configuration file must be installed on all nodes
+in your cluster.</P>
+
+<P>After you have filled in the fields of interest, use the
+"Submit" button on the bottom of the page to build the <I>slurm.conf</I>
+file. It will appear on your web browser. Save the file in text format
+as <I>slurm.conf</I> for use by SLURM.
+
+<P>For more information about SLURM, see
+<A HREF="http://www.schedmd.com/slurmdocs/slurm.html">http://www.schedmd.com/slurmdocs/slurm.html</A>
+
+<H2>Control Machines</H2>
+Define the hostname of the computer on which the SLURM controller and
+optional backup controller will execute. You can also specify addresses
+of these computers if desired (defaults to their hostnames).
+The IP addresses can be either numeric IP addresses or names.
+Hostname values should should not be the fully qualified domain
+name (e.g. use <I>tux</I> rather than <I>tux.abc.com</I>).
+<P>
+<input type="text" name="control_machine" value="linux0"> <B>ControlMachine</B>:
+Master Controller Hostname
+<P>
+<input type="text" name="control_addr"> <B>ControlAddr</B>: Master Controller
+Address (optional)
+<P>
+
+<H2>Compute Machines</H2>
+Define the machines on which user applications can run.
+You can also specify addresses of these computers if desired
+(defaults to their hostnames).
+Only a few of the possible parameters associated with the nodes will
+be set by this tool, but many others are available.
+Executing the command <i>slurmd -C</i> on each compute node will print its
+physical configuration (sockets, cores, real memory size, etc.), which
+can be used in constructing the <i>slurm.conf</i> file.
+All of the nodes will be placed into a single partition (or queue)
+with global access. Many options are available to group nodes into
+partitions with a wide variety of configuration parameters.
+Manually edit the <i>slurm.conf</i> produced to exercise these options.
+Node names and addresses may be specified using a numeric range specification.
+
+<P>
+<input type="text" name="node_name" value="linux[1-32]"> <B>NodeName</B>:
+Compute nodes
+<P>
+<input type="text" name="node_addr"> <B>NodeAddr</B>: Compute node addresses
+(optional)
+<P>
+<input type="text" name="partition_name" value="debug"> <B>PartitionName</B>:
+Name of the one partition to be created
+<P>
+<input type="text" name="max_time" value="INFINITE"> <B>MaxTime</B>:
+Maximum time limit of jobs in minutes or INFINITE
+<P>
+The following parameters describe a node's configuration.
+Set a value for <B>CPUs</B>.
+The other parameters are optional, but provide more control over scheduled resources:
+<P>
+<input type="text" name="procs" value="1"> <B>CPUs</B>: Count of processors
+on each compute node.
+If CPUs is omitted, it will be inferred from:
+Sockets, CoresPerSocket, and ThreadsPerCore.
+<P>
+<input type="text" name="sockets" value="">
+<B>Sockets</B>:
+Number of physical processor sockets/chips on the node.
+If Sockets is omitted, it will be inferred from:
+CPUs, CoresPerSocket, and ThreadsPerCore.
+<P>
+<input type="text" name="cores_per_socket" value="">
+<B>CoresPerSocket</B>:
+Number of cores in a single physical processor socket.
+The CoresPerSocket value describes physical cores, not
+the logical number of processors per socket.
+<P>
+<input type="text" name="threads_per_core" value="">
+<B>ThreadsPerCore</B>:
+Number of logical threads in a single physical core.
+<P>
+<input type="text" name="memory" value=""> <B>RealMemory</B>: Amount
+of real memory. This parameter is required when specifying Memory as a
+consumable resource with the select/cons_res plug-in. See below
+under Resource Selection.
+<P>
+
+<H2>SLURM User</H2>
+The SLURM controller (slurmctld) can run without elevated privileges,
+so it is recommended that a user "slurm" be created for it. For testing
+purposes any user name can be used.
+<P>
+<input type="text" name="slurm_user" value="slurm"> <B>SlurmUser</B>
+<P>
+
+<H2>State Preservation</H2>
+Define the location of a directory where the slurmctld daemon saves its state.
+This should be a fully qualified pathname which can be read and written to
+by the SLURM user on both the control machine and backup controller (if configured).
+The location of a directory where slurmd saves state should also be defined.
+This must be a unique directory on each compute server (local disk).
+The use of a highly reliable file system (e.g. RAID) is recommended.
+<P>
+<input type="text" name="state_save_location" value="/var/spool"> <B>StateSaveLocation</B>:
+Slurmctld state save directory </B>
+<P>
+<input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd"> <B>SlurmdSpoolDir</B>:
+Slurmd state save directory
+<P>
+Define when a non-responding (DOWN) node is returned to service.<BR>
+Select one value for <B>ReturnToService</B>:<BR>
+<input type="radio" name="return_to_service" value="0">
+<B>0</B>: When explicitly restored to service by an administrator.<BR>
+<input type="radio" name="return_to_service" value="1" checked>
+<B>1</B>: Automatically, when slurmd daemon registers with valid configuration<BR>
+<P>
+
+<H2>Scheduling</H2>
+Define the mechanism to be used for controlling job ordering.<BR>
+Select one value for <B>SchedulerType</B>:<BR>
+<input type="radio" name="sched_type" value="builtin"> <B>Builtin</B>: First-In
+First-Out (FIFO)<BR>
+<input type="radio" name="sched_type" value="backfill" checked> <B>Backfill</B>:
+FIFO with backfill<BR>
+<P>
+Define what node configuration (sockets, cores, memory, etc.) should be used.
+Using values defined in the configuration file will provide faster scheduling.<BR>
+Select one value for <B>FastSchedule</B>:<BR>
+<input type="radio" name="fast_schedule" value="1" checked>
+<B>1</B>: Use node configuration values defined in configuration file<BR>
+<input type="radio" name="fast_schedule" value="0">
+<B>0</B>: Use node configuration values actually found on each node
+(if configured with with gang scheduling or allocation of individual
+processors to jobs rather than only whole node allocations, the processor
+count on the node should match the configured value to avoid having extra
+processors left idle)
+<P>
+
+<H2>Interconnect</H2>
+Define the node interconnect used.<BR>
+Select one value for <B>SwitchType</B>:<BR>
+<input type="radio" name="switch_type" value="elan"> <B>Elan</B>: Quadrics Elan3 or Elan4<BR>
+<input type="radio" name="switch_type" value="federation"> <B>Federation</B>: IBM
+Federation Switch<BR>
+<input type="radio" name="switch_type" value="none" checked> <B>None</B>: No special
+handling required (InfiniBand, Myrinet, Ethernet, etc.)<BR>
+<P>
+
+<H2>Default MPI Type</H2>
+Specify the type of MPI to be used by default. SLURM will configure environment
+variables accordingly. Users can over-ride this specification with an srun option.<BR>
+Select one value for <B>MpiDefault</B>:<BR>
+<input type="radio" name="mpi_default" value="mpichgm"> <B>MPICH-GM</B><BR>
+<input type="radio" name="mpi_default" value="mpichmx"> <B>MPICH-MX</B><BR>
+<input type="radio" name="mpi_default" value="mpich1_p4"> <B>MPICH1-P4</B><BR>
+<input type="radio" name="mpi_default" value="mpich1_shmem"> <B>MPICH1-SHMEM</B>:
+This also works for MVAPICH-SHMEM.<BR>
+<input type="radio" name="mpi_default" value="pmi2"> <B>MPI-PMI2</B>
+(For MPI2 and MVAPICH2)<BR>
+<input type="radio" name="mpi_default" value="mvapich"> <B>MVAPICH</B><BR>
+<input type="radio" name="mpi_default" value="none" checked> <B>None</B>:
++This works for most other MPI types including MPICH2, LAM MPI and Open MPI.<BR>
+<P>
+
+<H2>Process Tracking</H2>
+Define the algorithm used to identify which processes are associated with a
+given job. This is used signal, kill, and account for the processes associated
+with a job step.<BR>
+Select one value for <B>ProctrackType</B>:<BR>
+<input type="radio" name="proctrack_type" value="aix"> <B>AIX</B>: Use AIX kernel
+extension, recommended for AIX systems<BR>
+<input type="radio" name="proctrack_type" value="cgroup"> <B>Cgroup</B>: Use
+Linux <i>cgroup</i> to create a job container and track processes.
+Build a <i>cgroup.conf</i> file as well<BR>
+<input type="radio" name="proctrack_type" value="pgid" checked> <B>Pgid</B>: Use Unix
+Process Group ID, processes changing their process group ID can escape from SLURM
+control<BR>
+<input type="radio" name="proctrack_type" value="linuxproc"> <B>LinuxProc</B>: Use
+parent process ID records, required for MPICH-GM use, processes can escape
+from SLURM control<BR>
+<input type="radio" name="proctrack_type" value="rms"> <B>RMS</B>: Use Quadrics
+kernel infrastructure, recommended for systems where this is available<BR>
+<input type="radio" name="proctrack_type" value="sgi_job"> <B>SGI's PAGG
+module</B>: Use <A HREF="http://oss.sgi.com/projects/pagg/">SGI's Process
+Aggregates (PAGG) kernel module</A>, recommended where available<BR>
+<P>
+
+<H2>Resource Selection</H2>
+Define resource (node) selection algorithm to be used.<BR>
+Select one value for <B>SelectType</B>:<BR>
+<input type="radio" name="select_type" value="cons_res">
+<B>Cons_res</B>: Allocate individual processors and memory<BR>
+<DL>
+<DL>
+<DT><B>SelectTypeParameters</B> (As used by <I>SelectType=Cons_res</I> only):
+ <DD> Note: The -E extension for sockets, cores, and threads
+ are ignored within the node allocation mechanism
+ when CR_CPU or CR_CPU_MEMORY is selected.
+ They are considered to compute the total number of
+ tasks when -n is not specified
+ <DD> Note: CR_MEMORY assumes MaxShare value of one of higher
+<DT> <input type="radio" name="cons_res_params" value="CR_CPU" checked
+ onClick="javascript:set_select_type(this, 'cons_res')">
+ <B>CR_CPU</B>: (default)
+ CPUs as consumable resources.
+ <DD> No notion of sockets, cores, or threads.
+ On a multi-core system, cores will be considered CPUs.
+ On a multi-core/hyperthread system, threads will be considered CPUs.
+ On a single-core systems CPUs are CPUs. ;-)
+<DT> <input type="radio" name="cons_res_params" value="CR_Socket"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_Socket</B>: Sockets as a consumable resource.
+<DT> <input type="radio" name="cons_res_params" value="CR_Core"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_Core</B>: Cores as a consumable resource.
+<DT> <input type="radio" name="cons_res_params" value="CR_Memory"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_Memory</B>: Memory as a consumable resource.
+ <DD> Note: CR_Memory assumes MaxShare value of one of higher
+<DT> <input type="radio" name="cons_res_params" value="CR_CPU_Memory"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_CPU_Memory</B>:
+ CPU and Memory as consumable resources.
+<DT> <input type="radio" name="cons_res_params" value="CR_Socket_Memory"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_Socket_Memory</B>:
+ Socket and Memory as consumable resources.
+<DT> <input type="radio" name="cons_res_params" value="CR_Core_Memory"
+ onClick="javascript:set_select_type(this)">
+ <B>CR_Core_Memory</B>:
+ Core and Memory as consumable resources.
+</DL>
+</DL>
+<input type="radio" name="select_type" value="linear" checked>
+<B>Linear</B>: Node-base
+resource allocation, does not manage individual processor allocation<BR>
+<input type="radio" name="select_type" value="bluegene">
+<B>BlueGene</B>: For IBM Blue Gene systems only<BR>
+<input type="radio" name="select_type" value="cray">
+<B>Cray</B>: Cray systems running ALPS only<BR>
+<P>
+
+<H2>Task Launch</H2>
+Define a task launch plugin. This may be used to
+provide resource management within a node (e.g. pinning
+tasks to specific processors).
+Select one value for <B>TaskPlugin</B>:<BR>
+<input type="radio" name="task_plugin" value="none" checked> <B>None</B>: No task launch actions<BR>
+<input type="radio" name="task_plugin" value="affinity"> <B>Affinity</B>:
+CPU affinity support
+(see srun man pages for the --cpu_bind, --mem_bind, and -E options)<BR>
+<input type="radio" name="task_plugin" value="cgroup"> <B>Cgroup</B>:
+Allocated resources constraints enforcement using Linux Control Groups
+(see cgroup.conf man page)
+</DL></DL>
+<P>
+
+<H2>Event Logging</H2>
+Slurmctld and slurmd daemons can each be configured with different
+levels of logging verbosity from 0 (quiet) to 7 (extremely verbose).
+Each may also be configured to use debug files. Use fully qualified
+pathnames for the files.
+<P>
+<input type="text" name="slurmctld_logfile" value=""> <B>SlurmctldLogFile</B> (default is none, log goes to syslog)
+<P>
+<input type="text" name="slurmd_logfile" value=""> <B>SlurmdLogFile</B> (default is none,
+log goes to syslog, string "%h" in name gets replaced with hostname)
+<P>
+
+<H2>Job Accounting Gather</H2>
+SLURM accounts for resource use per job. System specifics can be polled
+determined by system type<BR>
+Select one value for <B>JobAcctGatherType</B>:<BR>
+<input type="radio" name="job_acct_gather_type" value="none" checked> <B>None</B>: No
+job accounting<BR>
+<input type="radio" name="job_acct_gather_type" value="aix"> <B>AIX</B>: Specifc
+AIX process table information gathered, use with AIX systems only<BR>
+<input type="radio" name="job_acct_gather_type" value="linux"> <B>Linux</B>: Specifc
+Linux process table information gathered, use with Linux systems only<BR>
+
+<H2>Job Accounting Storage</H2>
+Used with the Job Accounting Gather SLURM can store the accounting information in many different fashions. Fill in your systems choice here<BR>
+Select one value for <B>AccountingStorageType</B>:<BR>
+<input type="radio" name="accounting_storage_type" value="none" checked> <B>None</B>:
+No job accounting storage<BR>
+<input type="radio" name="accounting_storage_type" value="filetxt"> <B>FileTxt</B>:
+Write job accounting to a text file (records limited information)<BR>
+<input type="radio" name="accounting_storage_type" value="mysql"> <B>MySQL</B>:
+Write job accounting to a MySQL database<BR>
+<input type="radio" name="accounting_storage_type" value="pgsql"> <B>PGSQL</B>:
+Write job accounting to a PostreSQL database (not fully supported)<BR>
+<input type="radio" name="accounting_storage_type" value="slurmdbd"> <B>SlurmDBD</B>:
+Write job accounting to Slurm DBD (database daemon) which can securely
+save the data from many Slurm managed clusters into a common database<BR>
+<p><b>Options below are for use with a database to specify where the database is running and how to connect to it</b><br>
+<input type="text" name="cluster_name" value="cluster"> <B>ClusterName</B>:
+Name to be recorded in database for jobs from this cluster.
+This is important if a single database is used to record information
+from multiple Slurm-managed clusters.<br>
+
+<P>
+
+<H2>Process ID Logging</H2>
+Define the location into which we can record the daemon's process ID.
+This is used for locate the appropriate daemon for signalling.
+Specify a specify the fully qualified pathname for the file.
+<P>
+<input type="text" name="slurmctld_pid_file" value="/var/run/slurmctld.pid">
+<B>SlurmctldPidFile</B>
+<P>
+<input type="text" name="slurmd_pid_file" value="/var/run/slurmd.pid">
+<B>SlurmdPidFile</B>
+<P>
+
+<BR>
+<BR>
+<input type=button value="Submit" onClick="javascript:displayfile()">
+<input type=reset value="Reset Form">
+<P>
+</FORM>
+<HR>
+<a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br>
+Last modified 3 April 2012</P>
+</BODY>
diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in
index a308af6..12b0543 100644
--- a/doc/html/configurator.html.in
+++ b/doc/html/configurator.html.in
@@ -72,6 +72,8 @@
document.config.select_type[1].click() // linear
} else if (form.name == "bluegene_params") {
document.config.select_type[2].click() // bluegene
+ } else if (form.name == "cray_params") {
+ document.config.select_type[3].click() // cray
}
}
@@ -86,6 +88,8 @@
return "#SelectTypeParameters="
} else if (document.config.select_type[2].checked) { // bluegene
return "#SelectTypeParameters="
+ } else if (document.config.select_type[3].checked) { // cray
+ return "#SelectTypeParameters="
}
}
@@ -158,6 +162,7 @@
"#PropagatePrioProcess=0 <br>" +
"#PropagateResourceLimits= <br>" +
"#PropagateResourceLimitsExcept= <br>" +
+ "#RebootProgram= <br>" +
"ReturnToService=" + get_radio_value(document.config.return_to_service) + "<br>" +
"#SallocDefaultCommand= <br>" +
"SlurmctldPidFile=" + document.config.slurmctld_pid_file.value + "<br>" +
@@ -310,6 +315,11 @@
<P>This form can be used to create a SLURM configuration file with
you controlling many of the important configuration parameters.</P>
+<p>This is the full version of the SLURM configuration tool. This version
+has all the configuration options to create a SLURM configuration file. There
+is a simplified version of the SLURM configuration tool available at
+<a href="configurator.easy.html">configurator.easy.html</a>.</p>
+
<P><B>This tool supports SLURM version @SLURM_MAJOR@.@SLURM_MINOR@ only.</B>
Configuration files for other versions of SLURM should be built
using the tool distributed with it in <i>doc/html/configurator.html</i>.
@@ -490,10 +500,10 @@
This must be a unique directory on each compute server (local disk).
The use of a highly reliable file system (e.g. RAID) is recommended.
<P>
-<input type="text" name="state_save_location" value="/tmp"> <B>StateSaveLocation</B>:
+<input type="text" name="state_save_location" value="/var/spool"> <B>StateSaveLocation</B>:
Slurmctld state save directory <B>Must be writable by both ControlMachine and BackupController</B>
<P>
-<input type="text" name="slurmd_spool_dir" value="/tmp/slurmd"> <B>SlurmdSpoolDir</B>:
+<input type="text" name="slurmd_spool_dir" value="/var/spool/slurmd"> <B>SlurmdSpoolDir</B>:
Slurmd state save directory
<P>
Define when a non-responding (DOWN) node is returned to service.<BR>
@@ -553,6 +563,8 @@
<input type="radio" name="mpi_default" value="mpich1_p4"> <B>MPICH1-P4</B><BR>
<input type="radio" name="mpi_default" value="mpich1_shmem"> <B>MPICH1-SHMEM</B>:
This also works for MVAPICH-SHMEM.<BR>
+<input type="radio" name="mpi_default" value="pmi2"> <B>MPI-PMI2</B>
+(For MPI2 and MVAPICH2)<BR>
<input type="radio" name="mpi_default" value="mvapich"> <B>MVAPICH</B><BR>
<input type="radio" name="mpi_default" value="none" checked> <B>None</B>:
+This works for most other MPI types including MPICH2, LAM MPI and Open MPI.<BR>
@@ -632,6 +644,8 @@
resource allocation, does not manage individual processor allocation<BR>
<input type="radio" name="select_type" value="bluegene">
<B>BlueGene</B>: For IBM Blue Gene systems only<BR>
+<input type="radio" name="select_type" value="cray">
+<B>Cray</B>: Cray systems running ALPS only<BR>
<P>
<H2>Task Launch</H2>
@@ -756,10 +770,7 @@
<input type="radio" name="accounting_storage_type" value="none" checked> <B>None</B>:
No job accounting storage<BR>
<input type="radio" name="accounting_storage_type" value="filetxt"> <B>FileTxt</B>:
-Write job accounting to a text file<BR>
-<input type="radio" name="accounting_storage_type" value="gold"> <B>Gold</B>:
-Write completion status to Gold database daemon which can securely
-save the data from many Slurm managed clusters into a common database<BR>
+Write job accounting to a text file (records limited information)<BR>
<input type="radio" name="accounting_storage_type" value="mysql"> <B>MySQL</B>:
Write job accounting to a MySQL database<BR>
<input type="radio" name="accounting_storage_type" value="pgsql"> <B>PGSQL</B>:
@@ -845,5 +856,5 @@
</FORM>
<HR>
<a href="disclaimer.html" target="_blank" class="privacy">Legal Notices</a><br>
-Last modified 8 August 2011</P>
+Last modified 3 April 2012</P>
</BODY>
diff --git a/doc/html/cons_res.shtml b/doc/html/cons_res.shtml
index b253daf..29c1bcb 100644
--- a/doc/html/cons_res.shtml
+++ b/doc/html/cons_res.shtml
@@ -152,7 +152,7 @@
time as well as request from users to help us prioritize the features.
Please send comments and requests about the consumable resources to
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.
<ol start=1 type=1>
<li><b>Issue with --max_nodes, --max_sockets_per_node, --max_cores_per_socket and --max_threads_per_core</b></li>
@@ -446,6 +446,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 8 July 2008</p>
+<p style="text-align:center;">Last modified 3 February 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/cray.shtml b/doc/html/cray.shtml
index 8b6f7f8..10a4e8a 100644
--- a/doc/html/cray.shtml
+++ b/doc/html/cray.shtml
@@ -1,16 +1,24 @@
<!--#include virtual="header.txt"-->
-<h1>SLURM User and Administrator Guide for Cray systems</h1>
+<h1>SLURM User and Administrator Guide for Cray Systems</h1>
-<h2>User Guide</h2>
+<ul>
+<li><a href="#user_guide">User Guide</a></li>
+<li><a href="#admin_guide">Administrator Guide</a></li>
+<li><a href="http://www.cray.com">Cray</a></li>
+</ul>
+
+<HR SIZE=4>
+
+<h2><a name="user_guide">User Guide</a></h2>
<p>This document describes the unique features of SLURM on Cray computers.
You should be familiar with the SLURM's mode of operation on Linux clusters
before studying the differences in Cray system operation described in this
document.</p>
-<p>SLURM version 2.3 is designed to operate as a job scheduler over Cray's
-Application Level Placement Scheduler (ALPS).
+<p>Since version 2.3, SLURM is designed to operate as a job scheduler over
+Cray's Application Level Placement Scheduler (ALPS).
Use SLURM's <i>sbatch</i> or <i>salloc</i> commands to create a resource
allocation in ALPS.
Then use ALPS' <i>aprun</i> command to launch parallel jobs within the resource
@@ -32,7 +40,8 @@
and the job steps not being visible, all other SLURM commands will operate
as expected. Note that in order to build and install the aprun wrapper
described above, execute "configure" with the <i>--with-srun2aprun</i>
-option or add <i>%_with_srun2aprun 1</i> to your <i>~/.rpmmacros</i> file.</p>
+option or add <i>%_with_srun2aprun 1</i> to your <i>~/.rpmmacros</i>
+file. This option is set with RPMs from Cray.</p>
<h3>Node naming and node geometry on Cray XT/XE systems</h3>
<p>SLURM node names will be of the form "nid#####" where "#####" is a five-digit sequence number.
@@ -99,7 +108,7 @@
Setting <i>--ntasks-per-node</i> to the number of cores per node yields the default per-CPU share
minimum value.</p>
-<p>For all cases in between these extremes, set --mem=per_task_memory and</p>
+<p>For all cases in between these extremes, set --mem=per_task_node or --mem-per-cpu=memory_per_cpu (node CPU count and task count may differ) and</p>
<pre>
--ntasks-per-node=floor(node_memory / per_task_memory)
</pre>
@@ -111,7 +120,7 @@
#SBATCH --comment="requesting 7500MB per task on 32000MB/24-core nodes"
#SBATCH --ntasks=64
#SBATCH --ntasks-per-node=4
- #SBATCH --mem=7500
+ #SBATCH --mem=30000
</pre>
<p>If you would like to fine-tune the memory limit of your application, you can set the same parameters in
a salloc session and then check directly, using</p>
@@ -128,7 +137,7 @@
on CLE 3.x systems for details.</p>
<h3>Node ordering options</h3>
-<p>SLURM honours the node ordering policy set for Cray's Application Level Placement Scheduler (ALPS). Node
+<p>SLURM honors the node ordering policy set for Cray's Application Level Placement Scheduler (ALPS). Node
ordering is a configurable system option (ALPS_NIDORDER in /etc/sysconfig/alps). The current
setting is reported by '<i>apstat -svv</i>' (look for the line starting with "nid ordering option") and
can not be changed at runtime. The resulting, effective node ordering is revealed by '<i>apstat -no</i>'
@@ -145,16 +154,38 @@
<p>Cray/ALPS node ordering is a topic of ongoing work, some information can be found in the CUG-2010 paper
"<i>ALPS, Topology, and Performance</i>" by Carl Albing and Mark Baker.</p>
-<h2>Administrator Guide</h2>
+<h3>GPU Use</h3>
+
+<p>Users may specify GPU memory required per node using the <i>--gres=gpu_mem:#</i>
+option to any of the commands used to create a job allocation/reservation.</p>
+
+<h3>Front-End Node Use</h3>
+
+<p>If you want to be allocated resources on a front-end node and no compute
+nodes (typically used for pre- or post-processing functionality) then submit a
+batch job with a node count specification of zero.</p>
+<pre>
+sbatch -N0 pre_process.bash
+</pre>
+<p><b>Note</b>: Support for Cray job allocations with zero compute nodes was
+added to SLURM version 2.4. Earlier versions of SLURM will return an error for
+zero compute node job requests.</p>
+<p><b>Note</b>: Job allocations with zero compute nodes can only be made in
+SLURM partitions explicitly configured with <b>MinNodes=0</b> (the default
+minimum node count for a partition is one compute node).</p>
+
+<HR SIZE=4>
+
+<h2><a name="admin_guide">Administrator Guide</a></h2>
<h3>Install supporting RPMs</h3>
<p>The build requires a few -devel RPMs listed below. You can obtain these from
SuSe/Novell.
<ul>
-<li>CLE 2.x uses SuSe SLES 10 packages (rpms may be on the normal isos)</li>
-<li>CLE 3.x uses Suse SLES 11 packages (rpms are on the SDK isos, there
-are two SDK iso files for SDK)</li>
+<li>CLE 2.x uses SuSe SLES 10 packages (RPMs may be on the normal ISOs)</li>
+<li>CLE 3.x uses Suse SLES 11 packages (RPMs are on the SDK ISOs, there
+are two SDK ISO files for SDK)</li>
</ul></p>
<p>You can check by logging onto the boot node and running</p>
@@ -167,7 +198,7 @@
<ul>
<li>expat-2.0.xxx</li>
<li>libexpat-devel-2.0.xxx</li>
-<li>cray-MySQL-devel-enterprise-5.0.64 (this should be on the Cray iso)</li>
+<li>cray-MySQL-devel-enterprise-5.0.64 (this should be on the Cray ISO)</li>
</ul>
<p>For example, loading MySQL can be done like this:</p>
@@ -189,61 +220,33 @@
<p>All Cray-specific PrgEnv and compiler modules should be removed and root
privileges will be required to install these files.</p>
-<h3>Create a build root</h3>
-
-<p>The build is done on a normal service node, where you like
-(e.g. <i>/ufs/slurm/build</i> would work).
-Most scripts check for the environment variable LIBROOT.
-You can either edit the scripts or export this variable. Easiest way:</p>
-
-<pre>
-login: # export LIBROOT=/ufs/slurm/build
-login: # mkdir -vp $LIBROOT
-login: # cd $LIBROOT
-</pre>
-
-<h3>Install SLURM modulefile</h3>
-
-<p>This file is distributed as part the SLURM tar-ball in
-<i>contribs/cray/opt_modulefiles_slurm</i>. Install it as
-<i>/opt/modulefiles/slurm</i> (or anywhere else in your module path).
-It means that you can use Munge as soon as it is built.</p>
-<pre>
-login: # scp ~/slurm/contribs/cray/opt_modulefiles_slurm root@boot:/rr/current/software/
-</pre>
-
-<h3>Build and install Munge</h3>
+<h3>Install Munge</h3>
<p>Note the Munge installation process on Cray systems differs
-somewhat from that described in the
+somewhat from that described in the
<a href="http://code.google.com/p/munge/wiki/InstallationGuide">
MUNGE Installation Guide</a>.</p>
-<p>Munge is the authentication daemon and needed by SLURM. Download
-munge-0.5.10.tar.bz2 or newer from
-<a href="http://code.google.com/p/munge/downloads/list">
-http://code.google.com/p/munge/downloads/list</a>. This is how one
-can build on a login node and install it.</p>
+<p>Munge is the authentication daemon and needed by SLURM. You can get
+Munge RPMs from Cray. Use the below method to install and test it. The
+Cray Munge RPM installs Munge in /opt/munge.</p>
+
+<p>If needed copy the RPMs over to the boot node</p>
<pre>
-login: # cd $LIBROOT
-login: # cp ~/slurm/contribs/cray/munge_build_script.sh $LIBROOT
-login: # mkdir -p ${LIBROOT}/munge/zip
-login: # curl -O http://munge.googlecode.com/files/munge-0.5.10.tar.bz2
-login: # cp munge-0.5.10.tar.bz2 ${LIBROOT}/munge/zip
-login: # chmod u+x ${LIBROOT}/munge/zip/munge_build_script.sh
-login: # ${LIBROOT}/munge/zip/munge_build_script.sh
-(generates lots of output and enerates a tar-ball called
-$LIBROOT/munge_build-.*YYYY-MM-DD.tar.gz)
-login: # scp munge_build-2011-07-12.tar.gz root@boot:/rr/current/software
+login: # scp munge-*.rpm root@boot:/rr/current/software
</pre>
-<p>Install the tar-ball by on the boot node and build an encryption
-key file executing:
+<p>Install the RPMs on the boot node. While this process creates a
+Munge key, it can't use the /etc/munge directory. So we make a
+/opt/munge/key directory instead and create a key there.</p>
<pre>
boot: # xtopview
-default: # tar -zxvf $LIBROOT/munge_build-*.tar.gz -C /rr/current /
-default: # dd if=/dev/urandom bs=1 count=1024 >/opt/slurm/munge/etc/munge.key
-default: # chmod go-rxw /opt/slurm/munge/etc/munge.key
+default: # rpm -ivh /software/munge-*.x86_64.rpm
+default: # mkdir /opt/munge/key
+default: # dd if=/dev/urandom bs=1 count=1024 >/opt/munge/key/munge.key
+default: # chmod go-rxw /opt/munge/key/munge.key
+default: # chown daemon /opt/munge/key/munge.key
+default: # perl -pi -e 's/#DAEMON_ARGS=/DAEMON_ARGS="--key-file \/opt\/munge\/key\/munge.key"/g' /etc/init.d/munge
default: # exit
</pre>
@@ -256,55 +259,37 @@
</ul></p>
<pre>
-login: # mkdir --mode=0711 -vp /var/lib/munge
-login: # mkdir --mode=0700 -vp /var/log/munge
-login: # mkdir --mode=0755 -vp /var/run/munge
-login: # module load slurm
-</pre>
-<pre>
sdb: # mkdir --mode=0711 -vp /var/lib/munge
sdb: # mkdir --mode=0700 -vp /var/log/munge
sdb: # mkdir --mode=0755 -vp /var/run/munge
+sdb: # chown daemon /var/lib/munge
+sdb: # chown daemon /var/log/munge
+sdb: # chown daemon /var/run/munge
+sdb: # /etc/init.d/munge start
+
</pre>
-<p>Start the munge daemon and test it.</p>
+<p>Start the Munge daemon and test it.</p>
<pre>
-login: # munged --key-file /opt/slurm/munge/etc/munge.key
+login: # export PATH=/opt/munge/bin:$PATH
login: # munge -n
MUNGE:AwQDAAAEy341MRViY+LacxYlz+mchKk5NUAGrYLqKRUvYkrR+MJzHTgzSm1JALqJcunWGDU6k3vpveoDFLD7fLctee5+OoQ4dCeqyK8slfAFvF9DT5pccPg=:
+login: # munge -n | unmunge
</pre>
-<p>When done, verify network connectivity by executing:
+<p>When done, verify network connectivity by executing the following (the
+Munged daemon must be started on the other-login-host as well):
<ul>
-<li><i>munge -n | ssh other-login-host /opt/slurm/munge/bin/unmunge</i></li>
+<li><i>munge -n | ssh other-login-host /opt/munge/bin/unmunge</i></li>
</ul>
-
-<p>If you decide to keep the installation, you may be interested in automating
-the process using an <i>init.d</i> script distributed with the Munge. This
-should be installed on all nodes running munge, e.g., 'xtopview -c login' and
-'xtopview -n sdbNodeID'
-</p>
-<pre>
-boot: # xtopview -c login
-login: # cp /software/etc_init_d_munge /etc/init.d/munge
-login: # chmod u+x /etc/init.d/munge
-login: # chkconfig munge on
-login: # exit
-boot: # xtopview -n 31
-node/31: # cp /software/etc_init_d_munge /etc/init.d/munge
-node/31: # chmod u+x /etc/init.d/munge
-node/31: # chkconfig munge on
-node/31: # exit
-</pre>
-
<h3>Enable the Cray job service</h3>
<p>This is a common dependency on Cray systems. ALPS relies on the Cray job service to
generate cluster-unique job container IDs (PAGG IDs). These identifiers are used by
ALPS to track running (aprun) job steps. The default (session IDs) is not unique
across multiple login nodes. This standard procedure is described in chapter 9 of
- <a href="http://docs.cray.com/books/S-2393-30/">S-2393</a> and takes only two
+ <a href="http://docs.cray.com/books/S-2393-4003/">S-2393</a> and takes only two
steps, both to be done on all 'login' class nodes (xtopview -c login):</p>
<ul>
<li>make sure that the /etc/init.d/job service is enabled (chkconfig) and started</li>
@@ -327,11 +312,13 @@
(uncomment the pam_job.so line as shown above)
</pre>
-<h3>Build and Configure SLURM</h3>
+<h3>Install and Configure SLURM</h3>
<p>SLURM can be built and installed as on any other computer as described
<a href="quickstart_admin.html">Quick Start Administrator Guide</a>.
-An example of building and installing SLURM version 2.3.0 is shown below.</p>
+You can also get current SLURM RPMs from Cray. An installation
+process for the RPMs is described below. The
+Cray SLURM RPMs install in /opt/slurm.</p>
<p><b>NOTE:</b> By default neither the <i>salloc</i> command or <i>srun</i>
command wrapper can be executed as a background process. This is done for two
@@ -346,11 +333,13 @@
</ol>
<p>You can optionally enable <i>salloc</i> and <i>srun</i> to execute as
background processes by using the configure option
-<i>"--enable-salloc-background"</i>, however doing will result in failed
+<i>"--enable-salloc-background"</i> (or the .rpmmacros option
+<i>"%_with_salloc_background 1"</i>), however doing will result in failed
resource allocations
(<i>error: Failed to allocate resources: Requested reservation is in use</i>)
if not executed sequentially and
-increase the likelyhood of orphaned processes.</p>
+increase the likelihood of orphaned processes. Specifically request
+this version when requesting RPMs from Cray as this is not on by default.</p>
<!-- Example:
Modify srun script or ask user to execute "/usr/bin/setsid"
before salloc or srun command -->
@@ -358,27 +347,17 @@
salloc spawns zsh, zsh spawns bash, etc.
when salloc terminates, bash becomes a child of init -->
+<p>If needed copy the RPMs over to the boot node.</p>
<pre>
-login: # mkdir build && cd build
-login: # slurm/configure \
- --prefix=/opt/slurm/2.3.0 \
- --with-munge=/opt/slurm/munge/ \
- --with-mysql_config=/opt/cray/MySQL/5.0.64-1.0000.2899.20.2.gem/bin \
- --with-srun2aprun
-login: # make -j
-login: # mkdir install
-login: # make DESTDIR=/tmp/slurm/build/install install
-login: # make DESTDIR=/tmp/slurm/build/install install-contrib
-login: # cd install
-login: # tar czf slurm_opt.tar.gz opt
-login: # scp slurm_opt.tar.gz boot:/rr/current/software
+login: # scp slurm-*.rpm root@boot:/rr/current/software
</pre>
+<p>Install the RPMs on the boot node.</p>
<pre>
boot: # xtopview
-default: # tar xzf /software/slurm_opt.tar.gz -C /
-default: # cd /opt/slurm/
-default: # ln -s 2.3.0 default
+default: # rpm -ivh /software/slurm-*.x86_64.rpm
+<i>edit /etc/slurm/slurm.conf and /etc/slurm/cray.conf</i>
+default: # exit
</pre>
<p>When building SLURM's <i>slurm.conf</i> configuration file, use the
@@ -394,7 +373,7 @@
<i>smap</i> and <i>sview</i> commands.
<i>NodeHostName</i> will be set to the node's component label.
The format of the component label is "c#-#c#s#n#" where the "#" fields
-represent in order: cabinet, row, cate, blade or slot, and node.
+represent in order: cabinet, row, cage, blade or slot, and node.
For example "c0-1c2s5n3" is cabinet 0, row 1, cage 3, slot 5 and node 3.</p>
<p>The <i>slurmd</i> daemons will not execute on the compute nodes, but will
@@ -413,7 +392,7 @@
<p>You need to specify the appropriate resource selection plugin (the
<i>SelectType</i> option in SLURM's <i>slurm.conf</i> configuration file).
-Configure <i>SelectType</i> to <i>select/cray</i> The <i>select/cray</i>
+Configure <i>SelectType</i> to <i>select/cray</i> The <i>select/cray</i>
plugin provides an interface to ALPS plus issues calls to the
<i>select/linear</i>, which selects resources for jobs using a best-fit
algorithm to allocate whole nodes to jobs (rather than individual sockets,
@@ -445,14 +424,17 @@
SchedulerType=sched/backfill
# Node selection: use the special-purpose "select/cray" plugin.
-# Internally this uses select/linar, i.e. nodes are always allocated
+# Internally this uses select/linear, i.e. nodes are always allocated
# in units of nodes (other allocation is currently not possible, since
# ALPS does not yet allow to run more than 1 executable on the same
# node, see aprun(1), section LIMITATIONS).
#
# Add CR_memory as parameter to support --mem/--mem-per-cpu.
+# GPU memory allocation supported as generic resource.
+# NOTE: No gres/gpu_mem plugin is required, only generic SLURM GRES logic.
SelectType=select/cray
SelectTypeParameters=CR_Memory
+GresTypes=gpu_mem
# Proctrack plugin: only/default option is proctrack/sgi_job
# ALPS requires cluster-unique job container IDs and thus the /etc/init.d/job
@@ -497,15 +479,17 @@
# here, since the Magny Cours hosts two NUMA nodes each, which is
# also visible in the ALPS inventory (4 Segments per node, each
# containing 6 'Processors'/Cores).
+# Also specify that 2 GB of GPU memory is available on every node
NodeName=DEFAULT Sockets=4 CoresPerSocket=6 ThreadsPerCore=1
NodeName=DEFAULT RealMemory=32000 State=UNKNOWN
+NodeName=DEFAULT Gres=gpu_mem:2g
# List the nodes of the compute partition below (service nodes are not
# allowed to appear)
NodeName=nid00[002-013,018-159,162-173,178-189]
# Frontend nodes: these should not be available to user logins, but
-# have all filesystems mounted that are also
+# have all filesystems mounted that are also
# available on a login node (/scratch, /home, ...).
FrontendName=palu[7-9]
@@ -596,7 +580,7 @@
<i>contribs/cray/etc_sysconfig_slurm</i> into <i>/etc/sysconfig/slurm</i>
for these limits to take effect. This script is executed from
<i>/etc/init.d/slurm</i>, which is typically executed to start the SLURM
-daemons. An excerpt of <i>contribs/cray/etc_sysconfig_slurm</i>is shown
+daemons. An excerpt of <i>contribs/cray/etc_sysconfig_slurm</i> is shown
below.</p>
<pre>
@@ -614,15 +598,6 @@
ulimit -d unlimited # max size of a process's data segment in KB
</pre>
-<p>SLURM's <i>init.d</i> script should also be installed to automatically
-start SLURM daemons when nodes boot as shown below. Be sure to edit the script
-as appropriate to reference the proper file location (modify the variable
-<i>PREFIX</i>).
-
-<pre>
-login: # scp /home/crayadm/ben/slurm/etc/init.d.slurm boot:/rr/current/software/
-</pre>
-
<p>SLURM will ignore any interactive jobs or nodes in interactive mode
so set all your nodes to batch from any service node. Dropping the
-n option will make all nodes batch.</p>
@@ -637,6 +612,7 @@
<pre>
sdb: # mkdir -p /ufs/slurm/log
sdb: # mkdir -p /ufs/slurm/spool
+sdb: # module load slurm
sdb: # /etc/init.d/slurm start
</pre>
@@ -666,6 +642,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 13 March 2012</p></td>
+<p style="text-align:center;">Last modified 18 September 2012</p></td>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml
index efe406c..116333d 100644
--- a/doc/html/documentation.shtml
+++ b/doc/html/documentation.shtml
@@ -9,7 +9,7 @@
<li><a href="quickstart.html">Quick Start User Guide</a></li>
<li><a href="man_index.html">Man Pages</a></li>
<li><a href="cpu_management.html">CPU Management User and Administrator Guide</a></li>
-<li><a href="mpi_guide.html">MPI Use Guide</a></li>
+<li><a href="mpi_guide.html">MPI and UPC Users Guide</a></li>
<li><a href="mc_support.html">Support for Multi-core/Multi-threaded Architectures</a></li>
<li><a href="multi_cluster.html">Multi-Cluster Operation</a></li>
<li><a href="checkpoint_blcr.html">SLURM Checkpoint/Restart with BLCR</a></li>
@@ -25,15 +25,18 @@
<h2>SLURM Administrators</h2>
<ul>
<li><a href="quickstart_admin.html">Quick Start Administrator Guide</a></li>
-<li><a href="cpu_management.html">CPU Management User and Administrator Guide</a></li>
-<li><a href="configurator.html">Configuration Tool</a></li>
-<li><a href="troubleshoot.html">Troubleshooting Guide</a></li>
-<li><a href="big_sys.html">Large Cluster Administration Guide</a></li>
<li><a href="accounting.html">Accounting</a></li>
+<li><a href="cgroups.html">Cgroups Guide</a></li>
+<li><a href="configurator.html">Configuration Tool (Full version)</a></li>
+<li><a href="configurator.easy.html">Configuration Tool (Simplified version)</a></li>
+<li><a href="cpu_management.html">CPU Management User and Administrator Guide</a></li>
+<li><a href="big_sys.html">Large Cluster Administration Guide</a></li>
<li><a href="power_save.html">Power Saving Guide</a></li>
+<li><a href="troubleshoot.html">Troubleshooting Guide</a></li>
<li>SLURM Scheduling</li>
<ul>
<li><a href="cons_res.html">Consumable Resources Guide</a></li>
+<li><a href="elastic_computing.html">Elastic Computing</a></li>
<li><a href="gang_scheduling.html">Gang Scheduling</a></li>
<li><a href="gres.html">Generic Resource (GRES) Scheduling</a></li>
<li><a href="high_throughput.html">High Throughput Computing Guide</a></li>
@@ -93,6 +96,6 @@
</li>
</ul>
-<p style="text-align:center;">Last modified 31 May 2011</p>
+<p style="text-align:center;">Last modified 15 May 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/download.shtml b/doc/html/download.shtml
index 5b06b50..fbc60d6 100644
--- a/doc/html/download.shtml
+++ b/doc/html/download.shtml
@@ -94,6 +94,11 @@
<li><b>Interconnect</b> plugins (Switch plugin)</li>
<ul>
+<li><b>Infiniband</b><br>
+The <b>topology.conf</b> file for an Infiniband switch can be automatically
+generated using the <b>ib2slurm</b> tool found here:
+<a href="https://github.com/fintler/ib2slurm">https://github.com/fintler/ib2slurm</a>.
+
<li><b>QsNet</b><br>
In order to build the "switch/elan" plugin for SLURM, you will need
the <b>qsnetlibs</b> development libraries from
@@ -149,6 +154,9 @@
<ul>
<li><a href="http://www.sdsc.edu/catalina/">Catalina</a> a scheduler supporting the
<a href="http://www.ogf.org/">Open Grid Forum</a> Advance Reservation API</li>
+<li><a href="http://web.mit.edu/star/cluster/docs/latest/index.html">StarCluster</a>
+cloud computing toolkit has a
+<a href="https://github.com/jlafon/StarCluster">SLURM port available</a>.
<li><a href="http://www.platform.com/">Load Sharing Facility (LSF)</a></li>
<li><a href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php">
Maui Scheduler</a></li>
@@ -167,6 +175,13 @@
available at <a href="http://www.nsc.liu.se/~kent/python-hostlist/">
http://www.nsc.liu.se/~kent/python-hostlist/</a>.</li>
+<!--
+This directly communicates with the database, so it is something that we
+probably do not want to recommend.
+<li><a href="http://pypi.python.org/pypi/slurchemy">slurchemy</a> provides
+SQL Alchemy bindings for your slurmdbd database.</li>
+-->
+
<li><a href="http://www.lua.org/">Lua</a> may be used to implement a
SLURM process tracking plugin.
The Lua script available in <i>contribs/lua/protrack.lua</i>
@@ -203,6 +218,6 @@
</ul>
-<p style="text-align:center;">Last modified 24 May 2011</p>
+<p style="text-align:center;">Last modified 15 May 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/elastic_computing.shtml b/doc/html/elastic_computing.shtml
new file mode 100644
index 0000000..9c3a692
--- /dev/null
+++ b/doc/html/elastic_computing.shtml
@@ -0,0 +1,177 @@
+<!--#include virtual="header.txt"-->
+
+<h1><a name="top">SLURM Elastic Computing</a></h1>
+
+<h2>Overview</h2>
+
+<p>SLURM version 2.4 has the ability to support a cluster that grows and
+shrinks on demand, typically relying upon a service such as
+<a href="http://aws.amazon.com/ec2/">Amazon Elastic Computing Cloud (Amazon EC2)</a>
+for resources.
+These resources can be combined with an existing cluster to process excess
+workload (cloud bursting) or it can operate as an independent self-contained
+cluster.
+Good responsiveness and throughput can be achieved while you only pay for the
+resources needed.</p>
+
+<p>The
+<a href="http://web.mit.edu/star/cluster/docs/latest/index.html">StarCluster</a>
+cloud computing toolkit has a
+<a href="https://github.com/jlafon/StarCluster">SLURM port available</a>.
+<a href="https://github.com/jlafon/StarCluster/wiki/Getting-started-with-SLURM-on-Amazon's-EC2">
+Instructions</a> for the SLURM port of StartCLuster are available online.</p>
+
+<p>The rest of this document describes details about SLURM's infrastructure that
+can be used to support Elastic Computing.</p>
+
+<p>SLURM's Elastic Computing logic relies heavily upon the existing power save
+logic.
+Review of SLURM's <a href="power_save.html">Power Saving Guide</a> is strongly
+recommended.
+This logic initiates programs when nodes are required for use and another
+program when those nodes are no longer required.
+For Elastic Computing, these programs will need to provision the resources
+from the cloud and notify SLURM of the node's name and network address and
+later relinquish the nodes back to the cloud.
+Most of the SLURM changes to support Elastic Computing were changes to
+support node addressing that can change.</p>
+
+<h2>SLURM Configuration</h2>
+
+<p>There are many ways to configure SLURM's use of resources.
+See the slurm.conf man page for more details about these options.
+Some general SLURM configuration parameters that are of interest include:
+<dl>
+<dt><b>ResumeProgram</b>
+<dd>The program executed when a node has been allocated and should be made
+available for use.
+<dt><b>SelectType</b>
+<dd>Generally must be "select/linear".
+If SLURM is configured to allocate individual CPUs to jobs rather than whole
+nodes (e.g. SelectType=select/cons_res rather than SelectType=select/linear),
+then SLURM maintains bitmaps to track the state of every CPU in the system.
+If the number of CPUs to be allocated on each node is not known when the
+slurmctld daemon is started, one must allocate whole nodes to jobs rather
+than individual processors.
+The use of "select/cons_res" requires each node to have a CPU count set and
+the node eventually selected must have at least that number of CPUs.
+<dt><b>SuspendExcNodes</b>
+<dd>Nodes not subject to suspend/resume logic. This may be used to avoid
+suspending and resuming nodes which are not in the cloud. Alternately the
+suspend/resume programs can treat local nodes differently from nodes being
+provisioned from cloud.
+<dt><b>SuspendProgram</b>
+<dd>The program executed when a node is no longer required and can be
+relinquished to the cloud.
+<dt><b>SuspendTime</b>
+<dd>The time interval that a node will be left idle before a request is made to
+relinquish it. Units are seconds.
+<dt><b>TreeWidth</b>
+<dd>Since the slurmd daemons are not aware of the network addresses of other
+nodes in the cloud, the slurmd daemons on each node should be sent messages
+directly and not forward those messages between each other. To do so,
+configure TreeWidth to a number at least as large as the maximum node count.
+The value may not exceed 65533.
+</dl>
+</p>
+
+<p>Some node parameters that are of interest include:
+<dl>
+<dt><b>Feature</b>
+<dd>A node feature can be associated with resources acquired from the cloud and
+user jobs can specify their preference for resource use with the "--constraint"
+option.
+<dt><b>NodeName</b>
+<dd>This is the name by which SLURM refers to the node. A name containing a
+numeric suffix is recommended for convenience. The NodeAddr and NodeHostname
+should not be set, but will be configured later using scripts.
+<dt><b>State</b>
+<dd>Nodes which are to be be added on demand should have a state of "CLOUD".
+These nodes will not actually appear in SLURM commands until after they are
+configured for use.
+<dt><b>Weight</b>
+<dd>Each node can be configured with a weight indicating the desirability of
+using that resource. Nodes with lower weights are used before those with higher
+weights.
+</dl>
+</p>
+
+<p>Nodes to be acquired on demand can be placed into their own SLURM partition.
+This mode of operation can be used to use these nodes only if so requested by
+the user. Note that jobs can be submitted to multiple partitions and will use
+resources from whichever partition permits faster initiation.
+A sample configuration in which nodes are added from the cloud when the workload
+exceeds available resources. Users can explicitly request local resources or
+resources from the cloud by using the "--constraint" option.
+</p>
+
+<pre>
+# SLURM configuration
+# Excerpt of slurm.conf
+SelectType=select/linear
+
+SuspendProgram=/usr/sbin/slurm_suspend
+ResumeProgram=/usr/sbin/slurm_suspend
+SuspendTime=600
+SuspendExcNodes=tux[0-127]
+TreeWidth=128
+
+NodeName=tux[0-127] Weight=1 Feature=local State=UNKNOWN
+NodeName=ec[0-127] Weight=8 Feature=cloud State=CLOUD
+PartitionName=debug MaxTime=1:00:00 Nodes=tux[0-32] Default=yes
+PartitionName=batch MaxTime=8:00:00 Nodes=tux[0-127],ec[0-127] Default=no
+</pre>
+
+<h2>Operational Details</h2>
+
+<p>When the slurmctld daemon starts, all nodes with a state of CLOUD will be
+included in its internal tables, but these node records will not be seen with
+user commands or used by applications until allocated to some job. After
+allocated, the <i>ResumeProgram</i> is executed and should do the following:</p>
+<ol>
+<li>Boot the node</li>
+<li>Configure and start Munge (depends upon configuration)</li>
+<li>Install the SLURM configuration file, slurm.conf, on the node.
+Note that configuration file will generally be identical on all nodes and not
+include NodeAddr or NodeHostname configuration parameters for any nodes in the
+cloud.
+SLURM commands executed on this node only need to communicate with the
+slurmctld daemon on the ControlMachine.
+<li>Notify the slurmctld daemon of the node's hostname and network address:<br>
+<i>scontrol update nodename=ec0 nodeaddr=123.45.67.89 nodehostname=whatever</i><br>
+Note that the node address and hostname information set by the scontrol command
+are are preserved when the slurmctld daemon is restarted unless the "-c"
+(cold-start) option is used.</li>
+<li>Start the slurmd daemon on the node</li>
+</ol>
+
+<p>The <i>SuspendProgram</i> only needs to relinquish the node back to the
+cloud.</p>
+
+<p>An environment variable SLURM_NODE_ALIASES contains sets of node name,
+communication address and hostname.
+The variable is set by salloc, sbatch, and srun.
+It is then used by srun to determine the destination for job launch
+communication messages.
+This environment variable is only set for nodes allocated from the cloud.
+If a job is allocated some resources from the local cluster and others from
+the cloud, only those nodes from the cloud will appear in SLURM_NODE_ALIASES.
+Each set of names and addresses is comma separated and
+the elements within the set are separated by colons. For example:<br>
+SLURM_NODE_ALIASES=ec0:123.45.67.8:foo,ec2,123.45.67.9:bar</p>
+
+<h2>Remaining Work</h2>
+
+<ul>
+<li>We need scripts to provision resources from EC2.</li>
+<li>The SLURM_NODE_ALIASES environment varilable needs to change if a job
+expands (adds resources).</li>
+<li>Some MPI implementations will not work due to the node naming.</li>
+<li>Some tests in SLURM's test suite fail.</li>
+</ul>
+
+<p class="footer"><a href="#top">top</a></p>
+
+<p style="text-align:center;">Last modified 15 May 2012</p>
+
+<!--#include virtual="footer.txt"-->
diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml
index 37cd5bb..6923734 100644
--- a/doc/html/faq.shtml
+++ b/doc/html/faq.shtml
@@ -46,6 +46,8 @@
running?</a></li>
<li><a href="#mpi_symbols">Why is my MPIHCH2 or MVAPICH2 job not running with
SLURM? Why does the DAKOTA program not run with SLURM?</a></li>
+<li><a href="#estimated_start_time">Why does squeue (and "scontrol show
+jobid") sometimes not display a job's estimated start time?</a></li>
</ol>
<h2>For Administrators</h2>
@@ -134,6 +136,8 @@
core file?</a></li>
<li><a href="#totalview">How can TotalView be configured to operate with
SLURM?</a></li>
+<li><a href="#git_patch">How can a patch file be generated from a SLURM commit
+in github?</a></li>
</ol>
@@ -562,8 +566,25 @@
<i>"man limits.conf"</i>) and insuring that <i>slurmd</i> takes
full advantage of this limit (e.g. by adding something like
<i>"ulimit -l unlimited"</i> to the <i>/etc/init.d/slurm</i>
-script used to initiate <i>slurmd</i>).
+script used to initiate <i>slurmd</i>). It may also be desirable to lock
+the slurmd daemon's memory to help insure that it keeps responding if memory
+swapping begings. A sample <i>/etc/sysconfig/slurm</i> file is shown below.
Related information about <a href="#pam">PAM</a> is also available.</p>
+<pre>
+#
+# Example /etc/sysconfig/slurm
+#
+# Increase the memlock limit so that user tasks can get
+# unlimited memlock
+ulimit -l unlimited
+#
+# Increase the open file limit
+ulimit -n 8192
+#
+# Memlocks the slurmd process's memory so that if a node
+# starts swapping, the slurmd will continue to respond
+SLURMD_OPTIONS="-M"
+</pre>
<p><a name="inactive"><b>19. Why is my batch job that launches no
job steps being killed?</b></a><br>
@@ -797,6 +818,17 @@
Rename DAKOTA's function and references from regcomp to something else to make
it work properly.</p>
+<p><a name="estimated_start_time"><b>26. Why does squeue (and "scontrol show
+jobid") sometimes not display a job's estimated start time?</b></a><br>
+When the backfill scheduler is configured, it provides an estimated start time
+for jobs that are candidates for backfill. Pending jobs with dependencies
+will not have an estimate as it is difficult to predict what resources will
+be available when the jobs they are dependent on terminate. Also note that
+the estimate is better for jobs expected to start soon, as most running jobs
+end before their estimated time. There are other restrictions on backfill that
+may apply. See the <a href="#backfill">backfill</a> section for more details.
+</p>
+
<p class="footer"><a href="#top">top</a></p>
@@ -1008,9 +1040,10 @@
address of the physical node in <i>NodeHostname</i>. Multiple
<i>NodeName</i> values can be mapped to a single
<i>NodeHostname</i>. Note that each <i>NodeName</i> on a single
-physical node needs to be configured to use a different port number. You
+physical node needs to be configured to use a different port number
+(set <i>Port</i> to a unique value on each line for each node). You
will also want to use the "%n" symbol in slurmd related path options in
-slurm.conf. </li>
+slurm.conf (<i>SlurmdLogFile</i> and <i>SlurmdPidFile</i>). </li>
<li>When starting the <i>slurmd</i> daemon, include the <i>NodeName</i>
of the node that it is supposed to serve on the execute line (e.g.
"slurmd -N hostname").</li>
@@ -1082,7 +1115,9 @@
<i>CoresPerSocket</i>, <i>ThreadsPerCore</i>, and/or <i>TmpDisk</i>).
SLURM will use the resource specification for each node that is
given in <i>slurm.conf</i> and will not check these specifications
-against those actually found on the node.
+against those actually found on the node. The system would best be configured
+with <i>TaskPlugin=task/none</i>, so that launched tasks can run on any
+available CPU under operating system control.
<p><a name="credential_replayed"><b>16. What does a
"credential replayed"
@@ -1467,8 +1502,17 @@
force_env: false;
}
</pre></p>
+
+<p><a name="git_patch"><b>44. How can a patch file be generated from a SLURM
+commit in github?</b></a></br>
+<p>Find and open the commit in github then append ".patch" to the URL and save
+the resutling file. For an example, see:
+<a href="https://github.com/SchedMD/slurm/commit/91e543d433bed11e0df13ce0499be641774c99a3.patch">
+https://github.com/SchedMD/slurm/commit/91e543d433bed11e0df13ce0499be641774c99a3.patch</a>
+</p>
+
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 4 September 2011</p>
+<p style="text-align:center;">Last modified 24 June 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/gang_scheduling.shtml b/doc/html/gang_scheduling.shtml
index b52a959..1838bc1 100644
--- a/doc/html/gang_scheduling.shtml
+++ b/doc/html/gang_scheduling.shtml
@@ -69,6 +69,8 @@
<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>.
Users can use the <I>--mem</I> or <I>--mem-per-cpu</I> option
at job submission time to specify their memory requirements.
+Note that in order to gang schedule jobs, all jobs must be able to fit into
+memory at the same time.
</LI>
<LI>
<B>JobAcctGatherType and JobAcctGatherFrequency</B>:
@@ -525,6 +527,6 @@
the selector and submitting appropriately sized jobs.
</P>
-<p style="text-align:center;">Last modified 24 June 2011</p>
+<p style="text-align:center;">Last modified 29 June 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/gres.shtml b/doc/html/gres.shtml
index b3c5535..e887954 100644
--- a/doc/html/gres.shtml
+++ b/doc/html/gres.shtml
@@ -43,7 +43,8 @@
The default value is set to the number of <B>File</B> values specified (if any),
otherwise the default value is one. A suffix of "K", "M" or "G" may be used
to mulitply the number by 1024, 1048576 or 1073741824 respectively
-(e.g. "Count=10G").</LI>
+(e.g. "Count=10G"). Note that Count is a 32-bit field and the maximum value
+is 4,294,967,295.</LI>
<LI><B>CPUs</B> Specify the CPU index numbers for the specific CPUs which can
use this resources. For example, it may be strongly preferable to use specific
@@ -150,6 +151,6 @@
file and insure they are in the increasing numeric order.</P>
<!-------------------------------------------------------------------------->
-<p style="text-align: center;">Last modified 8 May 2012</p>
+<p style="text-align: center;">Last modified 2 July 2012</p>
</body></html>
diff --git a/doc/html/gres_plugins.shtml b/doc/html/gres_plugins.shtml
index fad6b8a..a00c6b2 100644
--- a/doc/html/gres_plugins.shtml
+++ b/doc/html/gres_plugins.shtml
@@ -5,7 +5,7 @@
<h2> Overview</h2>
<p> This document describes SLURM generic resource plugins and the API that
defines them. It is intended as a resource to programmers wishing to write
-their own SLURM job submit plugins. This is version 100 of the API.
+their own SLURM job submit plugins. This is version 110 of the API.
<p>SLURM generic resource plugins must conform to the
SLURM Plugin API with the following specifications:
@@ -31,7 +31,7 @@
<p><span class="commandline">const uint32_t
plugin_version="<i>version_number</i>"</span><br>
<p style="margin-left:.2in">
-The version number should be 100.</p>
+The version number should be 110.</p>
<p>We include samples in the SLURM distribution for
<ul>
@@ -86,11 +86,29 @@
<span class="commandline">gres_ptr</span>
(input) pointer to the step's GRES allocation information.<br>
+<p class="commandline">
+extern void send_stepd(int fd)
+<p style="margin-left:.2in"><b>Description</b>:<br>
+This function is called by the <i>slurmd</i> daemon to send any needed
+information to the <i>slurmstepd</i> step shepherd.
+<p style="margin-left:.2in"><b>Arguments</b>: <br>
+<span class="commandline">fd</span>
+(input) file descriptor to write information to.<br>
+
+<p class="commandline">
+extern void recv_stepd(int fd)
+<p style="margin-left:.2in"><b>Description</b>:<br>
+This function is called by the <i>slurmstepd</i> step shepherd to read any
+needed information from the <i>slurmd</i> daemon.
+<p style="margin-left:.2in"><b>Arguments</b>: <br>
+<span class="commandline">fd</span>
+(input) file descriptor to read information from.<br>
+
<h2>Versioning</h2>
-<p> This document describes version 100 of the SLURM Generic Resource API.
+<p> This document describes version 110 of the SLURM Generic Resource API.
Future releases of SLURM may revise this API.
<p class="footer"><a href="#top">top</a>
-<p style="text-align:center;">Last modified 27 August 2010</p>
+<p style="text-align:center;">Last modified 1 February 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/header.txt b/doc/html/header.txt
index 98d4973..b021521 100644
--- a/doc/html/header.txt
+++ b/doc/html/header.txt
@@ -17,9 +17,21 @@
<body><div id="container">
<div id="banner">
-<img src="slurm_banner.gif" width="800" height="100" border="0" usemap="#Map">
+<img src="slurm_banner_schedmd.png" width="796" height="96" usemap="#Map">
</div>
+<div id="cse" style="width: 97%;">Loading</div>
+<script src="http://www.google.com/jsapi" type="text/javascript"></script>
+<script type="text/javascript">
+ google.load('search', '1', {language : 'en', style : google.loader.themes.V2_DEFAULT});
+ google.setOnLoadCallback(function() {
+ var customSearchOptions = {}; var customSearchControl = new google.search.CustomSearchControl(
+ '011612593158067573887:bmckasj55jw', customSearchOptions);
+ customSearchControl.setResultSetSize(google.search.Search.SMALL_RESULTSET);
+ customSearchControl.draw('cse');
+ }, true);
+</script>
+
<div id="navigation">
<h2><a href="slurm.shtml" class="nav">Home</a></h2>
diff --git a/doc/html/help.shtml b/doc/html/help.shtml
index 431174f..169ca05 100644
--- a/doc/html/help.shtml
+++ b/doc/html/help.shtml
@@ -5,7 +5,7 @@
<ol>
<li>See if the problem is addressed in the <a href="faq.html">SLURM FAQ</a>,
<a href="troubleshoot.html">SLURM Troubleshooting Guide</a> or the
-<a href="http://groups.google.com/group/slurm-devel">slurm-dev mailing list archive</a>.</li>
+<a href=http://dir.gmane.org/gmane.comp.distributed.slurm.devel">slurm-dev mailing list archive</a> (Hosted by <a href="http://www.gmane.org">www.gmane.org</a>).</li>
<li>For run-time problems, try running the command or daemons in verbose mode
(<span class="commandline">-v</span> option), and see if additional information
helps you resolve the problem.</li>
@@ -13,13 +13,13 @@
their support staff.</li>
<li>Send a detailed description of the problem, the output from the command
"scontrol show config", logs, back traces from any core files, etc. to
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</li>
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.</li>
</ol>
<h1>Commercial Support</h1>
<p>Several companies provide commercial support for SLURM including
<a href="http://www.schedmd.com">SchedMD</a>.</p>
-<p style="text-align:center;">Last modified 30 June 2011</p>
+<p style="text-align:center;">Last modified 28 March 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/high_throughput.shtml b/doc/html/high_throughput.shtml
index 8f2d89d..b384866 100644
--- a/doc/html/high_throughput.shtml
+++ b/doc/html/high_throughput.shtml
@@ -52,6 +52,9 @@
<h2>SLURM Configuration</h2>
+<p>NOTE: Substantial changes were made in SLURM version 2.4 to support higher
+throughput rates. Version 2.5 includes more enhancements.</p>
+
<p>Several SLURM configuration parameters should be adjusted to
reflect the needs of high throughput computing.</p>
@@ -71,6 +74,9 @@
The record of jobs run will be preserved in accounting records and logs.
The default value is 300 seconds. The value should be reduced to a few
seconds if possible.</li>
+<li><b>PriorityType</b>:
+The <b>priority/builtin</b> is considerably faster than other options, but
+schedules jobs only on a First In First Out (FIFO) basis.</li>
<li><b>SchedulerParameters</b>:
Several scheduling parameters are available.
<ul>
@@ -91,16 +97,29 @@
overhead of backfill scheduling (NOTE: the default values are fine for both
of these parameters).</li>
</ul></li>
+<li><b>SelectType</b>:
+The <b>select/serial</b> plugin is highly optimized if executing only serial
+(single CPU) jobs.</li>
<li><b>SlurmctldPort</b>:
It is desirable to configure the <b>slurmctld</b> daemon to accept incoming
messages on more than one port in order to avoid having incoming messages
discarded by the operating system due to exceeding the SOMAXCONN limit
described above. Using between two and ten ports is suggested when large
numbers of simultaneous requests are to be supported.</li>
+<li><b>SlurmctldDebug</b>:
+More detailed logging will decrease system throughput. Set to 2 (log errors
+only) or 3 (general information logging). Each increment in the logging level
+will increase the number of message by a factor of about 3.</li>
+<li><b>SlurmdDebug</b>:
+More detailed logging will decrease system throughput. Set to 2 (log errors
+only) or 3 (general information logging). Each increment in the logging level
+will increase the number of message by a factor of about 3.</li>
+<li><b>SlurmdLogFile</b>:
+Writing to local storage is recommended.</li>
<li>Other: Configure logging, accounting and other overhead to a minimum
appropriate for your environment.</li>
</ul>
-<p style="text-align:center;">Last modified 30 August 2010</p>
+<p style="text-align:center;">Last modified 12 July 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/linuxstyles.css b/doc/html/linuxstyles.css
index 1b40d0c..aa76334 100644
--- a/doc/html/linuxstyles.css
+++ b/doc/html/linuxstyles.css
@@ -11,9 +11,9 @@
text-align:center;
}
-h1 { font-size: 130%; font-weight:bold; text-align:left; color: #002280; }
-h2 { font-size: 105%; font-weight:bold; text-align:left; color: #002280; }
-h3 { font-size: 100%; font-weight:bold; text-align:left; color: #002280; }
+h1 { font-size: 130%; font-weight:bold; text-align:left; color: #066cb2; }
+h2 { font-size: 105%; font-weight:bold; text-align:left; color: #066cb2; }
+h3 { font-size: 100%; font-weight:bold; text-align:left; color: #066cb2; }
p { font-size: 90%; }
p ul { font-size: 90%; }
ul {font-size: 90%;}
@@ -31,7 +31,7 @@
width: 800px;
padding:10px;
border:2px;
- border-color:#002280;
+ border-color:#066cb2;
border-style:solid;
background-color: #FFFFFF;
line-height:130%;
@@ -39,6 +39,9 @@
/* The Linux at Livermore banner graphic */
#banner {
+ border:2px;
+ border-color:#066cb2;
+ border-style:solid;
background-color: #FFFFFF;
align: left;
}
@@ -74,14 +77,14 @@
.pnav {
font-size: 95%;
font-weight: bold;
- color: #002280;
+ color: #066cb2;
}
#footer #left {
clear: both;
float:left;
margin: 0;
- background-color: #002280;
+ background-color: #066cb2;
width:400px;
text-align: left;
height: 22px;
@@ -91,7 +94,7 @@
margin: 0;
float:left;
width:400px;
- background-color: #002280;
+ background-color: #066cb2;
text-align: right;
height: 22px;
}
@@ -136,7 +139,7 @@
font-size: 70%;
font-weight: bold;
padding-top: 10px;
- color: #002280
+ color: #066cb2
}
.boldsubnav {
@@ -154,11 +157,11 @@
.smalltextblue {
font-size: 70%;
line-height:1;
- color: #002280;
+ color: #066cb2;
}
a:link {
- color: #002280;
+ color: #066cb2;
text-decoration: underline
}
@@ -173,20 +176,20 @@
}
a:active {
- color: #002280;
+ color: #066cb2;
text-decoration: underline
}
a.nav:link {
font-size: 95%;
- color: #002280;
+ color: #066cb2;
font-weight: bold;
text-decoration: none;
}
a.nav:visited {
font-size: 95%;
- color: #002280;
+ color: #066cb2;
font-weight: bold;
text-decoration: none;
}
@@ -198,32 +201,32 @@
}
a.nav:active {
font-size: 95%;
- color: #002280;
+ color: #066cb2;
font-weight: bold;
text-decoration: none;
}
a.footer:link {
font-size: 70%;
- color: #002280;
+ color: #066cb2;
text-decoration: none;
}
a.footer:visited {
font-size: 70%;
- color: #002280;
+ color: #066cb2;
text-decoration: none;
}
a.footer:hover {
font-size: 70%;
- color: #002280;
+ color: #066cb2;
text-decoration: underline;
}
a.footer:active {
font-size: 70%;
- color: #002280;
+ color: #066cb2;
text-decoration: none;
}
diff --git a/doc/html/mail.shtml b/doc/html/mail.shtml
index 6ea8674..62a0ae9 100644
--- a/doc/html/mail.shtml
+++ b/doc/html/mail.shtml
@@ -3,20 +3,24 @@
<h1>Mailing Lists</h1>
<p>We maintain two SLURM mailing lists:</p>
<ul>
-<li><b>slurm-announce</b> is designated for communications about SLURM releases
+<li><b>slurm-announce@schedmd.com</b> is designated for communications about SLURM releases
[low traffic].</li>
-<li><b>slurm-dev</b> is designated for communications to SLURM developers
+<li><b>slurm-dev@schedmd.com</b> is designated for communications to SLURM developers
[high traffic at times].</li>
</ul>
-<p>To subscribe to either list, send a message to
-<a href="mailto:majordomo@lists.llnl.gov">majordomo@lists.llnl.gov</a> with the body of the
-message containing the word "subscribe" followed by the list name and your e-mail address
-(if not the sender). For example: <br>
-<i>subscribe slurm-announce bob@yahoo.com</i></p>
+<p>To subscribe to either list, vist the link below, sign up and get involved!<p>
+<a href="http://lists.schedmd.com/cgi-bin/dada/mail.cgi/list">http://lists.schedmd.com/cgi-bin/dada/mail.cgi/list</a><p>
+You can likewise unsubscribe for either list at the same link.
+<p>
+NOTE: If you were on the old slurm-dev@lists.llnl.gov list you were
+automatically migrated into the new list.
+</p>
-The archive of the <b>slurm-dev</b> list is located here:<br>
-<a href="http://groups.google.com/group/slurm-devel">groups.google.com/group/slurm-devel</a>.
+<p>The archive of the <b>slurm-dev</b> list is online:<br>
+<a href=http://dir.gmane.org/gmane.comp.distributed.slurm.devel">http://dir.gmane.org/gmane.comp.distributed.slurm.devel</a><br>
+and<br>
+<a href="http://groups.google.com/group/slurm-devel">http://groups.google.com/group/slurm-devel</a></p>
-<p style="text-align:center;">Last modified 13 November 2009</p>
+<p style="text-align:center;">Last modified 28 March 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml
index 7ee9c54..6d08002 100644
--- a/doc/html/meetings.shtml
+++ b/doc/html/meetings.shtml
@@ -2,19 +2,27 @@
<h1>Meetings</h1>
-<p><b>SLURM User Group Meeting 2011</b><br>
-September 22-23, 2011<br>
-Phoenix, Arizona, USA</p>
+<p><b>SLURM BoF at SC12</b><br>
+15 November 2012 12:15<br>
+Salt Lake City, UT</p>
-<p><a href="slurm_ug_cfp.html">Call for submissions</a><br>
-<a href="slurm_ug_registration.html">Registration</a><br>
-<a href="slurm_ug_agenda.html">Agenda</a><br>
+<p><b>SLURM User Group Meeting 2012</b><br>
+9-10 October 2012<br>
+Barcelona, Spain</p>
+
+<p><a href="http://www.bsc.es/SlurmUserMeeting2012">Call for submissions</a><br>
+<a href="http://www.bsc.es/SlurmUserMeeting2012">Registration</a><br>
+<a href="http://www.bsc.es/SlurmUserMeeting2012">Agenda</a><br>
</p>
+<!--<p><b>SLURM User Group Meeting 2011</b><br>
+September 22-23, 2011<br>
+Phoenix, Arizona, USA</p>-->
+
<!--<p><b>SLURM User Group Meeting 2010</b><br>
October 5, 2010<br>
Paris, France</p>-->
-<p style="text-align:center;">Last modified 23 May 2011</p>
+<p style="text-align:center;">Last modified 11 September 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/mpi_guide.shtml b/doc/html/mpi_guide.shtml
index e546f4a..e4de632 100644
--- a/doc/html/mpi_guide.shtml
+++ b/doc/html/mpi_guide.shtml
@@ -1,17 +1,16 @@
<!--#include virtual="header.txt"-->
-<h1>MPI Use Guide</h1>
+<h1>MPI and UPC Users Guide</h1>
<p>MPI use depends upon the type of MPI being used.
There are three fundamentally different modes of operation used
by these various MPI implementations.
<ol>
<li>SLURM directly launches the tasks and performs initialization
-of communications (Quadrics MPI, MPICH2, MPICH-GM, MPICH-MX,
-MVAPICH, MVAPICH2, some MPICH1 modes, and future versions of OpenMPI).</li>
+of communications (UPC, Quadrics MPI, MPICH2, MPICH-GM, MPICH-MX,
+MVAPICH, MVAPICH2, some MPICH1 modes, and OpenMPI version 1.5 or higher).</li>
<li>SLURM creates a resource allocation for the job and then
-mpirun launches tasks using SLURM's infrastructure (OpenMPI,
-LAM/MPI and HP-MPI).</li>
+mpirun launches tasks using SLURM's infrastructure (LAM/MPI and HP-MPI).</li>
<li>SLURM creates a resource allocation for the job and then
mpirun launches tasks using some mechanism other than SLURM,
such as SSH or RSH (BlueGene MPI and some MPICH1 modes).
@@ -34,6 +33,7 @@
<ul>
<li><a href="#bluegene_mpi">BlueGene MPI</a></li>
<li><a href="#hp_mpi">HP-MPI</a></li>
+<li><a href="#intel_mpi">Intel-MPI</a></li>
<li><a href="#lam_mpi">LAM/MPI</a></li>
<li><a href="#mpich1">MPICH1</a></li>
<li><a href="#mpich2">MPICH2</a></li>
@@ -43,6 +43,7 @@
<li><a href="#mvapich2">MVAPICH2</a></li>
<li><a href="#open_mpi">Open MPI</a></li>
<li><a href="#quadrics_mpi">Quadrics MPI</a></li>
+<li><a href="#UPC">UPC</a></li>
</ul></p>
<hr size=4 width="100%">
@@ -73,7 +74,7 @@
order to rectify the problem by cancelling the processes holding those
ports.</p>
-<h2>Older releases</h2>
+<h3>Version 1.4 or earlier</h3>
<p>Older versions of Open MPI and SLURM rely upon SLURM to allocate resources
for the job and then mpirun to initiate the tasks.
For example:
@@ -88,17 +89,78 @@
<hr size=4 width="100%">
-<h2><a name="quadrics_mpi" href="http://www.quadrics.com/"><b>Quadrics MPI</b></a></h2>
+<h2><a name="intel_mpi"><b>Intel MPI</b></a></h2>
-<p>Quadrics MPI relies upon SLURM to
-allocate resources for the job and <span class="commandline">srun</span>
-to initiate the tasks. One would build the MPI program in the normal manner
-then initiate it using a command line of this sort:</p>
+<p>Intel® MPI Library for Linux OS supports the following methods of
+launching the MPI jobs under the control of the SLURM job manager:</p>
+
+<li><a href="#intel_mpirun_mpd">The <i>mpirun</i> command over the MPD Process Manager (PM)</a></li>
+<li><a href="#intel_mpirun_hydra">The <i>mpirun</i> command over the Hydra PM</a></li>
+<li><a href="#intel_mpiexec_hydra">The <i>mpiexec.hydra</i> command (Hydra PM)</a></li>
+<li><a href="#intel_srun">The <i>srun</i> command (SLURM)</a></li>
+</ul>
+<p>This description provides detailed information on all of these methods.</p>
+
+<h3><a name="intel_mpirun_mpd">The mpirun Command over the MPD Process Manager</a></h3>
+<p>SLURM is supported by the <i>mpirun</i> command of the Intel® MPI Library 3.1
+Build 029 for Linux OS and later releases.</p>
+<p>When launched within a session allocated using the SLURM commands <i>sbatch</i> or
+<i>salloc</i>, the <i>mpirun</i> command automatically detects and queries certain SLURM
+environment variables to obtain the list of the allocated cluster nodes.</p>
+<p>Use the following commands to start an MPI job within an existing SLURM
+session over the MPD PM:</p>
<pre>
-$ srun [options] <program> [program args]
+<i>export I_MPI_PROCESS_MANAGER=mpd
+mpirun -n <num_procs> a.out</i>
</pre>
-<hr size=4 width="100%">
+<h3><a name="intel_mpirun_hydra">The mpirun Command over the Hydra Process Manager</a></h3>
+<p>SLURM is supported by the <i>mpirun</i> command of the Intel® MPI Library 4.0
+Update 3 through the Hydra PM by default. The behavior of this command is
+analogous to the MPD case described above.</p>
+<p>Use the one of the following commands to start an MPI job within an existing
+SLURM session over the Hydra PM:</p>
+<pre>
+<i>mpirun -n <num_procs> a.out</i>
+</pre>
+<p>or</p>
+<pre>
+<i>mpirun -bootstrap slurm -n <num_procs> a.out</i>
+</pre>
+<p>We recommend that you use the second command. It uses the <i>srun</i> command
+rather than the default <i>ssh</i> based method to launch the remote Hydra PM
+service processes.<p>
+
+<h3><a name="intel_mpiexec_hydra">The mpiexec.hydra Command (Hydra Process Manager)</a></h3>
+<p>SLURM is supported by the Intel® MPI Library 4.0 Update 3 directly
+through the Hydra PM.</p>
+<p>Use the following command to start an MPI job within an existing SLURM session:</p>
+<pre>
+<i>mpiexec.hydra -bootstrap slurm -n <num_procs> a.out</i>
+</pre>
+
+<h3><a name="intel_srun">The srun Command (SLURM)</a></h3>
+<p>This advanced method is supported by the Intel® MPI Library 4.0 Update 3
+ Use the following commands to allocate a SLURM session and start an MPI job in
+it, or to start an MPI job within a SLURM session already created using the
+<i>sbatch</i> or <i>salloc</i> commands:</p>
+<ul>
+<li>Set the <i>I_MPI_PMI_LIBRARY</i> environment variable to point to the
+SLURM Process Management Interface (PMI) library:</li>
+<pre>
+<i>export I_MPI_PMI_LIBRARY=/path/to/slurm/pmi/library/libpmi.so</i>
+</pre>
+<li>Use the <i>srun</i> command to launch the MPI job:</li>
+<pre>
+<i>srun -n <num_procs> a.out</i>
+</pre>
+</ul>
+
+<p>Above information used by permission from <a href="http://www.intel.com">Intel</a>.
+For more information see
+<a href="http://software.intel.com/en-us/articles/intel-mpi-library/">Intel MPI Library</a>.
+
+<hr size=4 width="100%">
<h2><a name="lam_mpi" href="http://www.lam-mpi.org/"><b>LAM/MPI</b></a></h2>
@@ -246,8 +308,22 @@
<h2><a name="bluegene_mpi" href="http://www.research.ibm.com/bluegene/"><b>BlueGene MPI</b></a></h2>
-<p>BlueGene MPI relies upon SLURM to create the resource allocation and then
-uses the native <span class="commandline">mpirun</span> command to launch tasks.
+<p>All IBM BlueGene Systems rely upon SLURM to create a job's resource
+allocation, but the task launch mechanism differs by system type.</p>
+
+<h3>BlueGene/Q</h3>
+<p>The BlueGene/Q systems support the ability to allocate different portions of
+a BlueGene block to different users and different jobs, so SLURM must be
+directly involved in each task launch request.
+<b>The following is subject to change in order to support debugging.</b>
+In order to accomplish this, SLURM's srun command is executed to launch tasks.
+The srun command creates a job step allocation then invokes IBM's
+<span class="commandline">runjob</span> command to launch tasks within the
+allocated resources.</p>
+
+<h3>BlueGene/L and BlueGene/P</h3>
+<p>BlueGene/L and P MPI relies upon the native
+<span class="commandline">mpirun</span> command to launch tasks.
Build a job script containing one or more invocations of the
<span class="commandline">mpirun</span> command. Then submit
the script to SLURM using <span class="commandline">sbatch</span>.
@@ -321,6 +397,33 @@
tasks. These tasks are not managed by SLURM since they are launched
outside of its control.</p>
-<p style="text-align:center;">Last modified 15 October 2010</p>
+<hr size=4 width="100%">
+
+<h2><a name="quadrics_mpi" href="http://www.quadrics.com/"><b>Quadrics MPI</b></a></h2>
+
+<p>Quadrics MPI relies upon SLURM to
+allocate resources for the job and <span class="commandline">srun</span>
+to initiate the tasks. One would build the MPI program in the normal manner
+then initiate it using a command line of this sort:</p>
+<pre>
+$ srun [options] <program> [program args]
+</pre>
+
+<hr size=4 width="100%">
+
+<h2><a name="UPC" href="http://upc.lbl.gov/"><b>UPC (Unified Parallel C)</b></a></h2>
+
+<p>Berkeley UPC (and likely other UPC implementations) rely upon SLURM to
+allocate resources and launch the application's tasks. The UPC library then
+reads SLURM environment variables in order to determine how the job's task
+count and location. One would build the UPC program in the normal manner
+then initiate it using a command line of this sort:</p>
+<pre>
+$ srun -N4 -n16 a.out
+</pre>
+
+<hr size=4 width="100%">
+
+<p style="text-align:center;">Last modified 9 April 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/mpiplugins.shtml b/doc/html/mpiplugins.shtml
index afffe04..62e3beb 100644
--- a/doc/html/mpiplugins.shtml
+++ b/doc/html/mpiplugins.shtml
@@ -16,9 +16,14 @@
The major type must be "mpi." The minor type can be any recognizable
abbreviation for the type of node selection algorithm. We recommend, for example:</p>
<ul>
-<li><b>lam</b>—For use with LAM MPI and Open MPI.</li>
-<li><b>mpich-gm</b>—For use with Myrinet.</li>
-<li><b>mvapich</b>—For use with Infiniband.</li>
+<li><b>lam</b>—For use with LAM MPI.</li>
+<li><b>mpich-gm</b>—For use with MPICH-GM.</li>
+<li><b>mpich-mx</b>—For use with MPICH-MX.</li>
+<li><b>mpich1_p4</b>—For use with MPICH1 (P4 communications).</li>
+<li><b>mpich1_shmem</b>—For use with MPICH1 (Shmem communications).</li>
+<li><b>mvapich</b>—For use with MVAPICH.</li>
+<li><b>openmpi</b>—For use with OpenMPI.</li>
+<li><b>pmi2</b>—For use with MPI2 and MVAPICH2.</li>
<li><b>none</b>—For use with most other versions of MPI.</li>
</ul>
<p>The <span class="commandline">plugin_name</span> and
@@ -42,6 +47,12 @@
<br>
which will set configure the slurmd to use the correct mpi as well to interact with the srun.
<br>
+slurmstepd process runs
+<br>
+<i>p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env);</i>
+<br>
+which executes immediately before fork/exec of tasks.
+<br>
<p class="footer"><a href="#top">top</a></p>
@@ -84,6 +95,17 @@
<p style="margin-left:.2in"><b>Returns</b>: false if multiple tasks can run and true if only
a single task can run at one time.</p>
+<p class="commandline">int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env);</p>
+<p style="margin-left:.2in"><b>Description</b>: Used by slurmstepd process immediately prior
+to fork and exec of user tasks.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> job</span> (input)
+Pointer to the slurmd sturcture for the job that is running.<br>
+<span class="commandline"> env</span> (input)
+Environment variables for tasks to be spawned.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
+the plugin should return -1.</p>
+
<p class="commandline">int mpi_p_exit();</p>
<p style="margin-left:.2in"><b>Description</b>: Cleans up anything that needs cleaning up after
execution.</p>
@@ -104,6 +126,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 11 April 2006</p>
+<p style="text-align:center;">Last modified 3 April 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/news.shtml b/doc/html/news.shtml
index 5ebe074..f757d9e 100644
--- a/doc/html/news.shtml
+++ b/doc/html/news.shtml
@@ -6,8 +6,9 @@
<ul>
<li><a href="#21">SLURM Version 2.1, January 2010</a></li>
<li><a href="#22">SLURM Version 2.2, December 2010</a></li>
-<li><a href="#23">SLURM Version 2.3, planned for Summer 2011</a></li>
-<li><a href="#24">SLURM Version 2.4 and beyond</a></li>
+<li><a href="#23">SLURM Version 2.3, September 2011</a></li>
+<li><a href="#24">SLURM Version 2.4, planned June 2012</a></li>
+<li><a href="#25">SLURM Version 2.5 and beyond2</a></li>
<li><a href="#security">Security Patches</a></li>
</ul>
@@ -54,7 +55,7 @@
</ul>
<h2><a name="23">Major Updates in SLURM Version 2.3</a></h2>
-<p>SLURM Version 2.3 release is planned for Summer 2011.
+<p>SLURM Version 2.2 was released in September 2011.
Major enhancements currently planned include:
<ul>
<li>Support for Cray XT and XE computers (integration with ALPS/BASIL).</li>
@@ -63,16 +64,30 @@
BlueGene architectures, improves performance and fault tolerance).</li>
<li>Support for Linux cgroup job containers including integration with
generic resources.</li>
-<li> Resource reservations with a node count specification will select
-those nodes optimized for the system topology.</li>
<li>Support for growing job allocations (support for shrinking jobs was added
in version 2.2).</li>
</ul>
-<h2><a name="24">Major Updates in SLURM Version 2.4 and beyond</a></h2>
+<h2><a name="24">Major Updates in SLURM Version 2.4</a></h2>
+<p>SLURM Version 2.4 release is planned for June 2012.
+Major enhancements currently planned include:
+<ul>
+<li>Major improvement in job throuhgput: Up to 500 jobs per second (actual
+throughput depends upon configuration, hardware, etc.).<li>
+<li>Support for BlueGene/Q computers (fully implemented).</li>
+<li>Resource reservations with a node count specification will select
+those nodes optimized for the system topology.</li>
+<li>Support for SLURM commands operating over a LoadLeveler resource manager.</li>
+<li>Support for job and step constraints with resource counts (e.g. a job
+request for two nodes with a feature of "Intel" plus four nodes with a feature
+of "AMD").</li>
+<li>New tool <i>sdiag</i> reports scheduling statistics.</li>
+</ul>
+
+<h2><a name="25">Major Updates in SLURM Version 2.5 and beyond</a></h2>
<p> Detailed plans for release dates and contents of additional SLURM releases
have not been finalized. Anyone desiring to perform SLURM development should
-notify <a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>
+notify <a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>
to coordinate activities. Future development plans includes:
<ul>
<li>Faster and more powerful job step management support (e.g. step
@@ -111,6 +126,6 @@
unpredictable results. This was fixed in SLURM version 2.1.14.</li>
</ul>
-<p style="text-align:center;">Last modified 17 March 2011</p>
+<p style="text-align:center;">Last modified 10 May 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/platforms.shtml b/doc/html/platforms.shtml
index 55a888e..fa446fb 100644
--- a/doc/html/platforms.shtml
+++ b/doc/html/platforms.shtml
@@ -3,10 +3,17 @@
<h1>Platforms</h1>
<h2>Operating Systems</h2>
<ul>
-<li><b>AIX</b>—SLURM support for AIX has been thoroughly tested.</li>
+<li><b>AIX</b>—SLURM support for AIX has been thoroughly tested, but we
+know of no AIX installations using SLURM after 2008. IBM's Parallel Environment
+(PE) and Parallel Operating Environment (POE) are not currently supported, but
+are expected to be supported with Linux late in 2012. See
+<a href="ibm.html">IBM AIX User and Administrator Guide</a> for more information.</li>
<li><b>Linux</b>—SLURM has been thoroughly tested on most popular Linux
distributions using i386, ia64, and x86_64 architectures.</li>
-<li><b>OS X</b>—SLURM support for OS X is available.</li>
+<li><b>OS X</b>—SLURM has run OS X in the past, but the current OS X linker
+does not support SLURM plugins. (OS X dynamically linked objects can be called by
+the main program. They may not call functions in the main program or other
+dynamically linked objects, which are features required by SLURM.)</li>
<li><b>Solaris</b>—SLURM support for Solaris (OpenSolaris build 119) was
added in version 2.1.</li>
<li><b>Other</b>—SLURM ports to other systems will be gratefully accepted.</li>
@@ -14,9 +21,13 @@
<h2>Interconnects</h2>
<ul>
<li><b>BlueGene</b>—SLURM support for IBM's BlueGene/L, BlueGene/P and
-BlueGene/Q systems has been thoroughly tested.</li>
+BlueGene/Q systems has been thoroughly tested. See
+<a href="bluegene.html">Blue Gene User and Administrator Guide</a>
+for more information.</li>
<li><b>Cray XT and XE</b>—Operates as a scheduler on top of Cray's
-ALPS/BASIL software. Supported added in SLURM version 2.3.</li>
+ALPS/BASIL software. Supported added in SLURM version 2.3. Note that Cray's
+ALPS/BASIL software necessitates some changes in SLURM behavior. See
+<a href="cray.html">Cray User and Administrator Guide</a> for more information.</li>
<li><b>Ethernet</b>—Ethernet requires no special support from SLURM and has
been thoroughly tested.</li>
<li><b>IBM Federation</b>—SLURM support for IBM's Federation Switch
@@ -30,6 +41,6 @@
<li><b>Other</b>—SLURM ports to other systems will be gratefully accepted.</li>
</ul>
-<p style="text-align:center;">Last modified 8 April 2011</p>
+<p style="text-align:center;">Last modified 9 April 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/priority_multifactor.shtml b/doc/html/priority_multifactor.shtml
index 24a83dc..5e0d2cd 100644
--- a/doc/html/priority_multifactor.shtml
+++ b/doc/html/priority_multifactor.shtml
@@ -41,7 +41,7 @@
<DT> Fair-share
<DD> the difference between the portion of the computing resource that has been promised and the amount of resources that has been consumed
<DT> Job size
-<DD> the number of nodes a job is allocated
+<DD> the number of nodes or CPUs a job is allocated
<DT> Partition
<DD> a factor associated with each node partition
<DT> QOS
@@ -99,7 +99,13 @@
<a name=jobsize>
<h2>Job Size Factor</h2></a>
-<P> The job size factor correlates to the number of nodes the job has requested. This factor can be configured to favor larger jobs or smaller jobs based on the state of the <i>PriorityFavorSmall</i> boolean in the slurm.conf file. When <i>PriorityFavorSmall</i> is NO, the larger the job, the greater its job size factor will be. A job that requests all the nodes on the machine will get a job size factor of 1.0. When the <i>PriorityFavorSmall</i> Boolean is YES, the single node job will receive the 1.0 job size factor.</P>
+<P>The job size factor correlates to the number of nodes or CPUs the job has
+requested. This factor can be configured to favor larger jobs or smaller jobs
+based on the state of the <i>PriorityFavorSmall</i> boolean in the slurm.conf
+file. When <i>PriorityFavorSmall</i> is NO, the larger the job, the greater
+its job size factor will be. A job that requests all the nodes on the machine
+will get a job size factor of 1.0. When the <i>PriorityFavorSmall</i> Boolean
+is YES, the single node job will receive the 1.0 job size factor.</P>
<!-------------------------------------------------------------------------->
<a name=partition>
@@ -564,6 +570,6 @@
</PRE>
<!-------------------------------------------------------------------------->
-<p style="text-align:center;">Last modified 2 November 2010</p>
+<p style="text-align:center;">Last modified 27 September 2011</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/programmer_guide.shtml b/doc/html/programmer_guide.shtml
index ba3d1e2..c65af1f 100644
--- a/doc/html/programmer_guide.shtml
+++ b/doc/html/programmer_guide.shtml
@@ -154,6 +154,17 @@
<p class="footer"><a href="#top">top</a></p>
+<h2>Source Code Management</h2>
+<p>The latest code is in github:
+<a href="https://github.com/SchedMD/slurm">https://github.com/SchedMD/slurm</a>.
+Creating your own branch will make it easier to keep it synchronized
+with our work.</p>
+
+<h2>Compiling</h2>
+<p>Sending the standard output of "make" to a file makes it easier to see any
+warning or error messages:<br>
+<i>"make -j install >make.out"</i></p>
+
<h2>Configuration</h2>
<p>Sample configuration files are included in the <b>etc</b> subdirectory.
The <b>slurm.conf</b> can be built using a <a href="configurator.html">configuration tool</a>.
@@ -169,7 +180,7 @@
<p>We also have a set of Expect SLURM tests available under the <b>testsuite/expect</b>
directory. These tests are executed after SLURM has been installed
-and the daemons initiated. About 250 test scripts exercise all SLURM commands
+and the daemons initiated. About 320 test scripts exercise all SLURM commands
and options including stress tests. The file <b>testsuite/expect/globals</b>
contains default paths and procedures for all of the individual tests. At
the very least, you will need to set the <i>slurm_dir</i> variable to the correct
@@ -251,6 +262,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 27 March 2009</p>
+<p style="text-align:center;">Last modified 23 April 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/publications.shtml b/doc/html/publications.shtml
index 4ada15b..999feba 100644
--- a/doc/html/publications.shtml
+++ b/doc/html/publications.shtml
@@ -7,6 +7,98 @@
<h2>Presentations</h2>
<ul>
+<!-- SLURM User Group Meeting 2012 -->
+<li><b>Presentations from SLURM User Group Meeting, October 2012</b></li>
+<ul>
+
+<li>Keynote: The OmSs Programming Model and its links to resource managers,
+Jesus Labarta, BSC</li>
+
+<li><a href="slurm_ug_2012/SUG_Oct2012_Status.pdf">SLURM Status Report</a>,
+Morris Jette and Danny Auble, SchedMD</li>
+
+<li>Site Report: BSC/RES,
+Alejandro Lucero and Carles Fenoy, BSC</li>
+
+<li>Site Report: CSCS,
+Stephen Trofinoff, CSCS</li>
+
+<li>Site Report: CEA,
+Matthieu Hautreux, CEA</li>
+
+<li>Site Report: CETA/CIEMAT,
+Alfonso Pardo Diaz, CIEMAT</li>
+
+<li>Porting SLURM to Bluegene/Q,
+Don Lipari, LLNL</li>
+
+<li>Tutorial: Configuration and use of SLURM database,
+Danny Auble, SchedMD</li>
+
+<li>Tutorial: How the SLURM scheduler works,
+Don Lipari, LLNL</li>
+
+<li>Tutorial: Cgroup support on SLURM,
+Yiannis Georgiou, Bull</li>
+
+<li>Tutorial: Kerberos and Auks with SLURM
+Matthieu Hautreux, CEA</li>
+
+<!-- day 2 -->
+
+<li>Keynote: Challenges in Evaluating Parallel Job Schedulers,
+Dror Feitelson, Hebrew University</li>
+
+<li><a href="slurm_ug_2012/SUG_Oct2012_PE.pdf">Integration of SLURM with IBM's Parallel Environment</a>,
+Morris Jette and Danny Auble, SchedMD</li>
+
+<li>SLURM Bank,
+Jimmy Tang, Trinity College, Dublin</li>
+
+<li>Using SLURM for Data Aware Scheduling in the Cloud,
+Martijn de Vries, Bright Computing</li>
+
+<li>Enhancing SLURM with Energy Consumption Monitoring and Control Features,
+Yiannis Georgiou, Bull</li>
+
+<li><a href="slurm_ug_2012/MapRedSLURM.pdf">MapReduce Support in SLURM: Releasing the Elephant</a>,
+Ralph H. Castain, et. al., Greenplum/EMC</li>
+
+<li>Using SLURM via Python,
+Mark Roberts (AWE) and Stephan Gorget (EDF)</li>
+
+<li><a href="slurm_ug_2012/SUG_Oct2012_HTC.pdf">High Throughput Computing with SLURM</a>,
+Morris Jette and Danny Auble, SchedMD</li>
+
+<li>Evaluating Scalability and Efficiency of SLURM on large HPC clusters,
+Yiannis Georgiou, Bull</li>
+
+<li>Integer Programming Based Herogeneous CPU-GPU Clusters,
+Seren Soner, Bogazici University</li>
+
+<li>Job Resource Utilizaiton as a Metric for Clusters Comparision and Optimization,
+Joseph Emeras, INRIA/LIG</li>
+
+</ul>
+
+<li><a href="pdfs/LCS_cgroups_BULL.pdf">Resource Management with Linux Control Groups in HPC Clusters</a>
+Yiannis Georgiou, Bull
+(6th Linux Collaboration Summit, April 2012)</li>
+
+<li><b>Presentations from SLURM Birds Of a Feather,
+SuperComputing 2011, November 2011</b></li>
+<ul>
+<li><a href="SC11_BOF/SchedMD_BOF_SC11.pdf">SLURM Version 2.3 and Beyond</a>
+Morris Jette, SchedMD LLC</li>
+
+<li><a href="SC11_BOF/BULL_BOF_SC11.pdf">Bull's SLURM Roadmap</a>
+Eric Monchalin, Bull</li>
+
+<li><a href="SC11_BOF/Bright_BOF_SC11.pdf">Cloud Bursting with SLURM and Bright Cluster Manager</a>
+Martijn de Vries, Bright Computing</li>
+</ul>
+
+<!-- SLURM User Group Meeting 2011 -->
<li><b>Presentations from SLURM User Group Meeting, September 2011</b></li>
<ul>
@@ -79,10 +171,6 @@
</li>
</ul>
-<li><a href="http://mescal.imag.fr/membres/yiannis.georgiou/publications/thesis_Georgiou-2010-UJF.pdf">
-Contributions For Resource and Job Management in High Performance Computing</a>,
-Yiannis Georgiou, Universite de Grenoble, France (Thesis, November 2010)</li>
-
<!-- Use LLNL-PRES-461787 -->
<li><a href="pdfs/slurm_sc10_bof.pdf">SLURM Version 2.2: Features and Release Plans</a>,
Morris Jette, Danny Auble and Donald Lipari, Lawrence Livermore National Laboratory
@@ -168,6 +256,11 @@
<h2>Publications</h2>
<ul>
+
+<li>GreenSpot: Scheduling Energy Consumption in Green Datacenters,
+Inigo Goiri, et. al.
+(SuperComputing 2011, November 2011)</li>
+
<li><a href="http://mescal.imag.fr/membres/yiannis.georgiou/publications/thesis_Georgiou-2010-UJF.pdf">
<b>Contributions for Resource and Job Management in High Performance Computing</b></a>,
Yiannis Georgiou, Universite Joseph Fourier
@@ -206,6 +299,12 @@
Brock Palen and Jeff Squyres speak with Morris Jette and
Danny Auble of LLNL about SLURM.</p>
-<p style="text-align:center;">Last modified 12 April 2011</p>
+<h2>Other Resources</h2>
+
+<p><a href="http://blog.ajdecon.org/learning-chef-compute-cluster-with-slurm">
+Learning Chef: Compute Cluter with SLURM</a>
+A SLURM Cookbook by Adam DeConinck</p>
+
+<p style="text-align:center;">Last modified 3 October 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/qos.shtml b/doc/html/qos.shtml
index eeaa8a0..013fbdc 100644
--- a/doc/html/qos.shtml
+++ b/doc/html/qos.shtml
@@ -10,6 +10,7 @@
<li> <a href=#priority>Job Scheduling Priority</a>
<li> <a href=#preemption>Job Preemption</a>
<li> <a href=#limits>Job Limits</a>
+<li> <a href=#qos_other>Other QOS Options</a>
</ul>
<P> The QOS's are defined in the SLURM database using the <i>sacctmgr</i>
@@ -74,22 +75,77 @@
QOS</P>
<UL>
-<LI><b>GrpCPUMins</b> Maximum number of CPU*minutes all jobs with this QOS can run.
-<LI><b>MaxCPUMinsPerJob</b> Maximum number of CPU*minutes any job with this QOS can run.
<LI><b>GrpCpus</b> Maximum number of CPU's all jobs with this QOS can be allocated.
-<LI><b>MaxCpusPerJob</b> Maximum number of CPU's any job with this QOS can be allocated.
-<LI><b>MaxCpusPerUser</b> Maximum number of CPU's any user with this QOS can be allocated.
+<LI><b>GrpCPUMins</b> A hard limit of cpu minutes to be used by jobs
+ running from this QOS. If this limit is reached all jobs running in
+ this group will be killed, and no new jobs will be allowed to run.
+<LI><b>GrpCPURunMins</b> Maximum number of CPU minutes all jobs
+ running with this QOS can run at the same time. This takes into
+ consideration time limit of running jobs. If the limit is reached
+ no new jobs are started until other jobs finish to allow time to
+ free up.
<LI><b>GrpJobs</b> Maximum number of jobs that can run with this QOS.
-<LI><b>MaxJobsPerUser</b> Maximum number of jobs a user can run with this QOS.
+<LI><b>GrpMemory</b> Maximum amount of memory (MB) all jobs with this QOS can be allocated.
<LI><b>GrpNodes</b> Maximum number of nodes that can be allocated to all jobs with this QOS.
-<LI><b>MaxNodesPerJob</b> Maximum number of nodes that can be allocated to any job with this QOS.
-<LI><b>MaxNodesPerUser</b> Maximum number of nodes that can be allocated to any user with this QOS.
<LI><b>GrpSubmitJobs</b> Maximum number of jobs with this QOS that can be in the system (no matter what state).
-<LI><b>MaxSubmitJobsPerUser</b> Maximum number of jobs with this QOS that can be in the system.
<LI><b>GrpWall</b> Wall clock limit for all jobs running with this QOS.
+<LI><b>MaxCpusPerJob</b> Maximum number of CPU's any job with this QOS can be allocated.
+<LI><b>MaxCPUMinsPerJob</b> Maximum number of CPU*minutes any job with this QOS can run.
+<LI><b>MaxNodesPerJob</b> Maximum number of nodes that can be allocated to any job with this QOS.
<LI><b>MaxWallDurationPerJob</b> Wall clock limit for any jobs running with this QOS.
+<LI><b>MaxCpusPerUser</b> Maximum number of CPU's any user with this QOS can be allocated.
+<LI><b>MaxJobsPerUser</b> Maximum number of jobs a user can run with this QOS.
+<LI><b>MaxNodesPerUser</b> Maximum number of nodes that can be allocated to any user with this QOS.
+<LI><b>MaxSubmitJobsPerUser</b> Maximum number of jobs with this QOS that can be in the system.
</UL>
+<a name=qos_other>
+<h2>Other QOS Options</h2></a>
+<ul>
+<li><b>Flags</b> Used by the slurmctld to override or enforce certain
+ characteristics. Valid options are
+
+<ul>
+<li><b>EnforceUsageThreshold</b> If set, and the QOS also has a UsageThreshold,
+any jobs submitted with this QOS that fall below the UsageThreshold
+will be held until their Fairshare Usage goes above the Threshold.
+
+<li><b>NoReserve</b> If this flag is set and backfill scheduling is used,
+jobs using this QOS will not reserve resources in the backfill
+schedule's map of resources allocated through time. This flag is
+intended for use with a QOS that may be preempted by jobs associated
+with all other QOS (e.g use with a "standby" QOS). If the allocated is
+used with a QOS which can not be preempted by all other QOS, it could
+result in starvation of larger jobs.
+
+<li><b>PartitionMaxNodes</b> If set jobs using this QOS will be able to
+override the requested partition's MaxNodes limit.
+
+<li><b>PartitionMinNodes</b> If set jobs using this QOS will be able to
+override the requested partition's MinNodes limit.
+
+<li><b>PartitionTimeLimit</b> If set jobs using this QOS will be able to
+override the requested partition's TimeLimit.
+
+<li><b>RequiresReservaton</b> If set jobs using this QOS must designate a
+reservation when submitting a job. This option can be useful in
+restricting usage of a QOS that may have greater preemptive capability
+or additional resources to be allowed only within a reservation.
+</ul>
+
+<li><b>GraceTime</b> Preemption grace time to be extended to a job
+ which has been selected for preemption.
+<li><b>UsageFactor</b> Usage factor when running with this QOS
+ (i.e. .5 would make it use only half the time as normal in
+ accounting and 2 would make it use twice as much.)
+<li><b>UsageThreshold</b>
+A float representing the lowest fairshare of an association allowable
+to run a job. If an association falls below this threshold and has
+pending jobs or submits new jobs those jobs will be held until the
+usage goes back above the threshold. Use <i>sshare</i> to see current
+shares on the system.
+</ul>
+
<h2>Configuration</h2>
<P> To summarize the above, the QOS's and their associated limits are
diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml
index 2454906..f7b33e9 100644
--- a/doc/html/quickstart.shtml
+++ b/doc/html/quickstart.shtml
@@ -373,6 +373,7 @@
<ul>
<li><a href="mpi_guide.html#bluegene_mpi">BlueGene MPI</a></li>
<li><a href="mpi_guide.html#hp_mpi">HP-MPI</a></li>
+<li><a href="mpi_guide.html#intel_mpi">Intel MPI</a></li>
<li><a href="mpi_guide.html#lam_mpi">LAM/MPI</a></li>
<li><a href="mpi_guide.html#mpich1">MPICH1</a></li>
<li><a href="mpi_guide.html#mpich2">MPICH2</a></li>
@@ -384,6 +385,6 @@
<li><a href="mpi_guide.html#quadrics_mpi">Quadrics MPI</a></li>
</ul></p>
-<p style="text-align:center;">Last modified 26 February 2009</p>
+<p style="text-align:center;">Last modified 24 February 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml
index adc47e2..347ec33 100644
--- a/doc/html/quickstart_admin.shtml
+++ b/doc/html/quickstart_admin.shtml
@@ -74,15 +74,15 @@
</p>
<ul>
<li> <b>MUNGE</b> The auth/munge plugin will be built if the MUNGE authentication
- library is installed. MUNGE is used as the default
- authentication mechanism.</li>
+ library is installed. MUNGE is used as the default
+ authentication mechanism.</li>
<li> <b>Authd</b> The auth/authd plugin will be built and installed if
- the libauth library and its dependency libe are installed.
+ the libauth library and its dependency libe are installed.
</li>
<li> <b>Federation</b> The switch/federation plugin will be built and installed
if the IBM Federation switch library is installed.
<li> <b>QsNet</b> support in the form of the switch/elan plugin requires
- that the qsnetlibs package (from Quadrics) be installed along
+ that the qsnetlibs package (from Quadrics) be installed along
with its development counterpart (i.e. the qsnetheaders
package.) The switch/elan plugin also requires the
presence of the libelanosts library and /etc/elanhosts
@@ -103,7 +103,7 @@
"ptrack" kernel patch is required for process
tracking.
<li> <b>sview</b> The sview command will be built only if and <i>gtk+-2.0</i>
- is installed</li>
+ is installed</li>
</ul>
Please see the <a href=download.html>Download</a> page for references to
required software to build these plugins.</p>
@@ -369,7 +369,8 @@
for the host "mcri". Port numbers to be used for
communications are specified as well as various timer values.</p>
-<p>The <i>SlurmUser</i> must be created as needed prior to starting SLURM.
+<p>The <i>SlurmUser</i> must be created as needed prior to starting SLURM
+and must exist on all nodes in your cluster.
The parent directories for SLURM's log files, process ID files,
state save directories, etc. are not created by SLURM.
They must be created and made writable by <i>SlurmUser</i> as needed prior to
@@ -668,11 +669,9 @@
<p>If the SlurmDBD daemon is used, it must be at the same or higher minor
release number as the Slurmctld daemons.
In other words, when changing the version to a higher release number (e.g
-from 2.0 to 2.1) <b>always upgrade the SlurmDBD daemon first</b>.
-There is no need to upgrade the SlurmDBD daemon when performing a n update
-at the micro level (e.g. from 2.1.0 to 2.1.1).</p>
+from 2.0 to 2.1) <b>always upgrade the SlurmDBD daemon first</b>.</p>
-<p>When upgrading to a new major or minor release of SLURM <u>prior to version
+<p>When upgrading to a new major or minor release of SLURM <u>prior to version
2.2</u> (e.g. 2.0.x to 2.1.x) all running and pending jobs will be purged due to
changes in state save information.
When upgrading to a new micro release of SLURM (e.g. 2.1.1 to 2.1.2) all
@@ -689,6 +688,6 @@
</pre> <p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 9 November 2010</p>
+<p style="text-align:center;">Last modified 19 March 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/reservations.shtml b/doc/html/reservations.shtml
index f94179d..07ddaf9 100644
--- a/doc/html/reservations.shtml
+++ b/doc/html/reservations.shtml
@@ -74,7 +74,7 @@
license from being scheduled on this cluster during this reservation.</p>
<pre>
$ scontrol create reservation starttime=2009-04-06T16:00:00 \
- duration=120 user=root flags=maint,ignore_jobs \
+ duration=120 user=root flags=ignore_jobs \
licenses=lustre*1000
Reservation created: root_4
@@ -83,7 +83,7 @@
EndTime=2009-04-06T18:00:00 Duration=120
Nodes= NodeCnt=0
Features=(null) PartitionName=(null)
- Flags=MAINT,SPEC_NODES,IGNORE_JOBS Licenses=lustre*1000
+ Flags=IGNORE_JOBS Licenses=lustre*1000
Users=root Accounts=(null)
</pre>
@@ -124,6 +124,29 @@
Users=alan,brenda Accounts=(null)
</pre>
+<p>Reservations can be optimized with respect to system topology if the
+reservation request includes information about the sizes of jobs to be created.
+This is especially important for BlueGene systems due to restrictive rules
+about the topology of created blocks (due to hardware constraints and/or
+SLURM's configuration). To take advantage of this optimization, specify the
+sizes of jobs of to be concurrently executed. The example below creates a
+reservation containing 4096 c-nodes on a BlueGene system so that two 2048
+c-node jobs can execute simultaneously.</p>
+
+<pre>
+$ scontrol create reservation user=alan,brenda \
+ starttime=noon duration=60 nodecnt=2k,2k
+Reservation created: alan_8
+
+$ scontrol show res
+ReservationName=alan_9 StartTime=2011-12-05T12:00:00
+ EndTime=2011-12-05T13:00:00 Duration=60
+ Nodes=bgp[000x011,210x311] NodeCnt=4096
+ Features=(null) PartitionName=pdebug
+ Flags= Licenses=(null)
+ Users=alan,brenda Accounts=(null)
+</pre>
+
<p>Note that specific nodes to be associated with the reservation are
made immediately after creation of the reservation. This permits
users to stage files to the nodes in preparation for use during the
@@ -148,7 +171,7 @@
Licenses = cpu*64
$ scontrol create reservation starttime=2009-04-06T16:00:00 \
- duration=120 user=bob flags=maint,ignore_jobs \
+ duration=120 user=bob flags=ignore_jobs \
licenses=cpu*32
Reservation created: bob_5
</pre>
@@ -278,7 +301,7 @@
nodes be reserved (work around described above).</li>
</ol>
-<p style="text-align: center;">Last modified 15 September 2011</p>
+<p style="text-align: center;">Last modified 31 January 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/resource_limits.shtml b/doc/html/resource_limits.shtml
index 49f7592..cdd75cd 100644
--- a/doc/html/resource_limits.shtml
+++ b/doc/html/resource_limits.shtml
@@ -133,6 +133,12 @@
run after previous jobs complete from this group.
</li>
+<li><b>GrpMemory=</b> The total amount of memory (MB) able to be used
+ at any given time from jobs running from this association and its
+ children. If this limit is reached new jobs will be queued but only
+ allowed to run after resources have been relinquished from this group.
+</li>
+
<li><b>GrpNodes=</b> The total count of nodes able to be used at any given
time from jobs running from this association and its children. If
this limit is reached new jobs will be queued but only allowed to
diff --git a/doc/html/selectplugins.shtml b/doc/html/selectplugins.shtml
index 104672b..dcc1fac 100644
--- a/doc/html/selectplugins.shtml
+++ b/doc/html/selectplugins.shtml
@@ -61,7 +61,7 @@
/* leave the job queued for later execution */
else {
while (!<i>select_p_job_ready</i>())
- wait
+ wait
/* execute the job */
/* wait for job to end or be terminated */
<i>select_p_job_fini</i>()
@@ -441,7 +441,7 @@
Call xfree() to release the memory allocated for the return value.</p>
<p class="commandline">int select_p_job_test (struct job_record *job_ptr,
-bitstr_t *bitmap, int min_nodes, int max_nodes, int req_nodes, int mode,
+bitstr_t *bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, uint32_t mode,
List preemption_candidates, List *preempted_jobs);</p>
<p style="margin-left:.2in"><b>Description</b>: Given a job's scheduling requirement
specification and a set of nodes which might be used to satisfy the request, identify
@@ -640,6 +640,15 @@
<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
the plugin should return SLURM_ERROR.</p>
+<p class="commandline">int select_p_fail_cnode(struct step_record *step_ptr);</p>
+<p style="margin-left:.2in"><b>Description</b>: This function fails
+ certain cnodes in a blocks midplane.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline">step_ptr</span>  (input)
+information on the step that has failed cnodes.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
+the plugin should return SLURM_ERROR.</p>
+
<p class="footer"><a href="#top">top</a></p>
@@ -664,7 +673,7 @@
<h3>Get Information Functions</h3>
-<p class="commandline">int select_p_get_info_from_plugin(enum select_data_info info,
+<p class="commandline">int select_p_get_info_from_plugin(enum select_plugindata_info dinfo,
struct job_record *job_ptr, void *data);</p>
<p style="margin-left:.2in"><b>Description</b>: Get plugin-specific information
about a job.</p>
@@ -701,14 +710,9 @@
<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
the plugin should return SLURM_ERROR.</p>
-<p class="commandline">int *select_p_ba_get_dims(void);</p>
-<p style="margin-left:.2in"><b>Description</b>: Return an array containing
-the number of elements in each dimension of the system size. For example, an IBM
-Bluegene/P system has a three-dimensional torus topology. If it has eight elements
-in the X dimension, and four in the Y and Z dimensions, the returned array will
-contain the values 8, 4, 4.</p>
-<p style="margin-left:.2in"><b>Returns</b>: An array containing the number of
-elements in each dimension of the system size.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Block Allocator interface</h3>
<p class="commandline">void select_p_ba_init(node_info_msg_t *node_info_ptr, bool sanity_check);</p>
<p style="margin-left:.2in"><b>Description</b>: Construct an internal block allocation
@@ -725,6 +729,15 @@
<p style="margin-left:.2in"><b>Description</b>: Free storage allocated by
select_p_ba_init().</p>
+<p class="commandline">int *select_p_ba_get_dims(void);</p>
+<p style="margin-left:.2in"><b>Description</b>: Return an array containing
+the number of elements in each dimension of the system size. For example, an IBM
+Bluegene/P system has a three-dimensional torus topology. If it has eight elements
+in the X dimension, and four in the Y and Z dimensions, the returned array will
+contain the values 8, 4, 4.</p>
+<p style="margin-left:.2in"><b>Returns</b>: An array containing the number of
+elements in each dimension of the system size.</p>
+
<p class="footer"><a href="#top">top</a></p>
@@ -738,6 +751,6 @@
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 3 August 2011</p>
+<p style="text-align:center;">Last modified 12 June 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/slurm.shtml b/doc/html/slurm.shtml
index 1a0df20..0f16456 100644
--- a/doc/html/slurm.shtml
+++ b/doc/html/slurm.shtml
@@ -31,8 +31,8 @@
<p>While other resource managers do exist, SLURM is unique in several
respects:
<ul>
-<li>It is designed to operate in a heterogeneous cluster with up to 65,536 nodes
-and hundreds of thousands of processors.</li>
+<li>It is designed to operate in a heterogeneous cluster with up to millions of
+processors (2.1 million sockets scheduled on an emulated IBM BlueGene/Q system).</li>
<li>It can sustain a throughput rate of over 120,000 jobs per hour with
bursts of job submissions at several times that rate.</li>
<li>Its source code is freely available under the
@@ -50,19 +50,38 @@
<p>SLURM provides resource management on many of the most powerful computers in
the world including:
<ul>
+<li><a href="https://asc.llnl.gov/computing_resources/sequoia/">Sequoia</a>,
+a BlueGene/Q system at <a href="https://www.llnl.gov">LLNL</a>
+with 1.6 petabytes of memory, 96 racks, 98,304 compute nodes, and 1.6
+ million cores, with a peak performance of over 20 Petaflops.</li>
+
<li><a href="http://www.nytimes.com/2010/10/28/technology/28compute.html?_r=1&partner=rss&emc=rss">
Tianhe-1A</a> designed by
<a href="http://english.nudt.edu.cn">The National University of Defence Technology (NUDT)</a>
in China with 14,336 Intel CPUs and 7,168 NVDIA Tesla M2050 GPUs, with a peak performance of 2.507 Petaflops.</li>
+<li><a href="http://www-hpc.cea.fr/en/complexe/tgcc-curie.htm">TGCC
+ Curie</a>, owned by GENCI and operated into the TGCC by CEA, Curie
+ is offering 3 different fractions of x86-64 computing resources
+ for addressing a wide range of scientific challenges and offering
+ an aggregate peak performance of 2 PetaFlops.</li>
+
<li><a href="http://www.wcm.bull.com/internet/pr/rend.jsp?DocId=567851&lang=en">
Tera 100</a> at <a href="http://www.cea.fr">CEA</a>
with 140,000 Intel Xeon 7500 processing cores, 300TB of
-central memory and a theoretical computing power of 1.25 Petaflops. Europe's
-most powerful supercomputer.</li>
+central memory and a theoretical computing power of 1.25 Petaflops.</li>
+
+<li><a href="http://compeng.uni-frankfurt.de/index.php?id=86">LOEWE-CSC</a>,
+a combined CPU-GPU Linux cluster
+at <a href="http://csc.uni-frankfurt.de">The Center for Scientific
+Computing (CSC)</a> of the Goethe University Frankfurt, Germany,
+with 20,928 AMD Magny-Cours CPU cores (176 Teraflops peak
+performance) plus 778 ATI Radeon 5870 GPUs (2.1 Petaflops peak
+performance single precision and 599 Teraflops double precision) and
+QDR Infiniband interconnect.</li>
<li><a href="https://asc.llnl.gov/computing_resources/sequoia/">Dawn</a>,
-a BlueGene/P system at <a href=https://www.llnl.gov">LLNL</a>
+a BlueGene/P system at <a href="https://www.llnl.gov">LLNL</a>
with 147,456 PowerPC 450 cores with a peak
performance of 0.5 Petaflops.</li>
@@ -75,17 +94,8 @@
<li><a href="http://c-r-labs.com/">EKA</a> at Computational Research Laboratories,
India with 14,240 Xeon processors and Infiniband interconnect</li>
-<li><a href="http://www.bsc.es/plantillaA.php?cat_id=5">MareNostrum</a>
-a Linux cluster at the <a href="http://www.bsc.es">Barcelona Supercomputer Center</a>
-with 10,240 PowerPC processors and a Myrinet switch</li>
-
-<li><a href="http://en.wikipedia.org/wiki/Anton_(computer)">Anton</a>
-a massively parallel supercomputer designed and built by
-<a href="http://www.deshawresearch.com/">D. E. Shaw Research</a>
-for molecular dynamics simulation using 512 custom-designed ASICs
-and a three-dimensional torus interconnect.</li>
</ul>
-<p style="text-align:center;">Last modified 5 May 2011</p>
+<p style="text-align:center;">Last modified 26 November 2011</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/slurm_banner_schedmd.png b/doc/html/slurm_banner_schedmd.png
new file mode 100644
index 0000000..04ae3a9
--- /dev/null
+++ b/doc/html/slurm_banner_schedmd.png
Binary files differ
diff --git a/doc/html/slurm_ug_cfp.shtml b/doc/html/slurm_ug_cfp.shtml
index 3fa75ef..45212c8 100644
--- a/doc/html/slurm_ug_cfp.shtml
+++ b/doc/html/slurm_ug_cfp.shtml
@@ -22,7 +22,7 @@
<p>Everyone who wants to present their own usage, developments, site report,
or tutorial about SLURM is invited to send a short abstract including
the presentation's expected duration to
-<a href="mailto:slurm_user_group@lists.llnl.gov">slurm_user_group@lists.llnl.gov</a>.</p>
+<a href="mailto:slurm_user_group@schedmd.com">slurm_user_group@schedmd.com</a>.</p>
<p>IMPORTANT DATES:<br>
April 30, 2011: Submission of abstracts<br>
diff --git a/doc/html/slurm_ug_registration.shtml b/doc/html/slurm_ug_registration.shtml
index 339cfac..e560340 100644
--- a/doc/html/slurm_ug_registration.shtml
+++ b/doc/html/slurm_ug_registration.shtml
@@ -41,7 +41,7 @@
You should receive a confirmation of this initial stage within in a few days and
final confirmation by <b>August 26, 2011</b>.</p>
- <FORM METHOD=POST ENCTYPE="text/plain" ACTION="mailto:slurm_user_group@lists.llnl.gov?subject=SLURM User Group Registration">
+ <FORM METHOD=POST ENCTYPE="text/plain" ACTION="mailto:slurm_user_group@schedmd.com?subject=SLURM User Group Registration">
<PRE>
<table width="100%" border=0 cellspacing=0 cellpadding=0>
<tr>
diff --git a/doc/html/slurmstyles.css b/doc/html/slurmstyles.css
index 77f0e94..9ec6a8d 100644
--- a/doc/html/slurmstyles.css
+++ b/doc/html/slurmstyles.css
@@ -39,3 +39,568 @@
width: 90%;
}
}
+
+/**
+ * Default Theme, v2.
+ *
+ */
+/* Slight reset to make the preview have ample padding. */
+.cse .gsc-control-cse,
+.gsc-control-cse {
+ padding: 1em;
+ width: auto;
+}
+.cse .gsc-control-wrapper-cse,
+.gsc-control-wrapper-cse {
+ width: 100%;
+}
+.cse .gsc-branding,
+.gsc-branding {
+ display: none;
+}
+.cse .gsc-control-cse div,
+.gsc-control-cse div {
+ position: normal;
+}
+/* Selector for entire element. */
+.cse .gsc-control-cse,
+.gsc-control-cse {
+ background-color: #fff;
+ border: 1px solid #fff;
+}
+.cse .gsc-control-cse:after,
+.gsc-control-cse:after {
+ content:".";
+ display:block;
+ height:0;
+ clear:both;
+ visibility:hidden;
+}
+.cse .gsc-resultsHeader,
+.gsc-resultsHeader {
+ border: block;
+}
+table.gsc-search-box td.gsc-input {
+ padding-right: 24px;
+}
+
+.gsc-search-box-tools .gsc-search-box .gsc-input {
+ padding-right: 12px;
+}
+
+input.gsc-input {
+ font-size: 16px;
+ padding: 4px 9px;
+ border: 1px solid #D9D9D9;
+ width: 99%;
+}
+
+.gsc-input-box {
+ border: 1px solid #D9D9D9;
+ background: #fff;
+ height: 25px;
+}
+
+.gsc-search-box .gsc-input>input:hover,
+.gsc-input-box-hover {
+ border: 1px solid #b9b9b9;
+ border-top-color: #a0a0a0;
+ -moz-box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
+ -webkit-box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
+ box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
+ outline: none;
+}
+.gsc-search-box .gsc-input>input:focus,
+.gsc-input-box-focus {
+ border: 1px solid #4d90fe;
+ -moz-box-shadow: inset 0 1px 2px rgba(0,0,0,.3);
+ -webkit-box-shadow: inset 0 1px 2px rgba(0,0,0,.3);
+ box-shadow: inset 0 1px 2px rgba(0,0,0,.3);
+ outline: none;
+}
+
+/* Search button */
+.cse input.gsc-search-button,
+input.gsc-search-button {
+ font-family: inherit;
+ font-size: 11px;
+ font-weight: bold;
+ color: #fff;
+ padding: 0 8px;
+ height: 29px;
+ min-width: 54px;
+
+ border: 1px solid #666666;
+ border-radius: 2px;
+ -moz-border-radius: 2px;
+ -webkit-border-radius: 2px;
+
+ border-color: #3079ed;
+ background-color: #4d90fe;
+ background-image: -webkit-gradient(linear,left top,left bottom,from(#4d90fe),to(#4787ed));
+ background-image: -webkit-linear-gradient(top,#4d90fe,#4787ed);
+ background-image: -moz-linear-gradient(top,#4d90fe,#4787ed);
+ background-image: -ms-linear-gradient(top,#4d90fe,#4787ed);
+ background-image: -o-linear-gradient(top,#4d90fe,#4787ed);
+ background-image: linear-gradient(top,#4d90fe,#4787ed);
+ filter: progid:DXImageTransform.Microsoft.gradient(startColorStr='#4d90fe',EndColorStr='#4787ed');
+}
+.cse input.gsc-search-button:hover,
+input.gsc-search-button:hover {
+ border-color: #2f5bb7;
+ background-color: #357ae8;
+ background-image: -webkit-gradient(linear,left top,left bottom,from(#4d90fe),to(#357ae8));
+ background-image: -webkit-linear-gradient(top,#4d90fe,#357ae8);
+ background-image: -moz-linear-gradient(top,#4d90fe,#357ae8);
+ background-image: -ms-linear-gradient(top,#4d90fe,#357ae8);
+ background-image: -o-linear-gradient(top,#4d90fe,#357ae8);
+ background-image: linear-gradient(top,#4d90fe,#357ae8);
+ filter: progid:DXImageTransform.Microsoft.gradient(startColorStr='#4d90fe',EndColorStr='#357ae8');
+}
+
+.cse input.gsc-search-button:focus,
+input.gsc-search-button:focus {
+ box-shadow:inset 0 0 0 1px rgba(255,255,255,0.5);
+ -webkit-box-shadow:inset 0 0 0 1px rgba(255,255,255,0.5);
+ -moz-box-shadow:inset 0 0 0 1px rgba(255,255,255,0.5);
+}
+
+.cse .gsc-search-button input.gsc-search-button-v2,
+input.gsc-search-button-v2 {
+ width: 13px;
+ height: 13px;
+ padding: 6px 27px;
+ min-width: 13px;
+ margin-top: 2px;
+}
+
+.gsc-refinementHeader {
+ text-decoration: none;
+ font-weight: bold;
+ color: #666;
+}
+
+.gsc-refinementHeader.gsc-refinementhActive {
+ text-decoration: none;
+ color: #DD4B39;
+}
+
+.gsc-refinementHeader.gsc-refinementhInactive {
+ text-decoration: none;
+ cursor: pointer;
+}
+
+.gsc-refinementHeader.gsc-refinementhInactive>span:hover {
+ text-decoration: underline;
+}
+
+.gsc-refinementhActive>span {
+ border-bottom: 3px solid;
+ padding-bottom: 2px;
+}
+
+.gsc-refinementsArea {
+ margin-top: 0;
+ padding-bottom: 4px;
+ padding-top: 10px;
+}
+
+/* Foont size for refinements */
+.gsc-tabsArea {
+ font-size: 11px;
+}
+/* For searcher tabs */
+.gsc-tabsArea > .gsc-tabHeader {
+ height: 27px;
+}
+.gsc-tabsArea > div {
+ height: 30px;
+ overflow: auto;
+}
+/* No spacers needed for keneddy refinements */
+.gsc-tabsArea .gs-spacer {
+ display: none;
+}
+.gsc-tabsArea .gs-spacer-opera {
+ display: none;
+}
+.gsc-tabsArea {
+ margin-top: 12px;
+ margin-bottom: 0;
+ height: 29px;
+ border-bottom: 1px solid #CCC;
+}
+/* Refinement tab properties */
+.gsc-tabHeader {
+ display: inline-block;
+ padding: 0 8px 1px 8px;
+ margin-right: 0px;
+ margin-top: 0px;
+ font-weight: bold;
+ height: 27px;
+ line-height: 27px;
+ min-width: 54px;
+ text-align: center;
+}
+/* Active refinement tab properties */
+.gsc-tabHeader.gsc-tabhActive {
+ border: 1px solid #ccc;
+ border-bottom-color: #fff;
+ color: #202020;
+}
+/* Inactive refinement tab properties */
+.gsc-tabHeader.gsc-tabhInactive {
+ background: #fff;
+ color: #666;
+ border-left: 0;
+ border-right: 0;
+ border-top: 0;
+}
+/* Inner wrapper for an image result */
+.gsc-imageResult-column,
+.gsc-imageResult-classic {
+ padding: .25em;
+ border: 1px solid #fff;
+ margin-bottom: 1em;
+}
+/* Inner wrapper for a result */
+.gsc-webResult.gsc-result {
+ padding: .25em;
+ border: 1px solid #fff;
+ margin-bottom: 0;
+}
+/* Inner wrapper for a result */
+.cse .gsc-webResult.gsc-result {
+ border: 1px solid #fff;
+ margin-bottom: 0;
+}
+/* Wrapper for a result. */
+.gsc-webResult .gsc-result {
+ padding: 10px 0 10px 0;
+}
+/* Result hover event styling */
+.cse .gsc-webResult.gsc-result:hover,
+.gsc-webResult.gsc-result:hover,
+.gsc-webResult.gsc-result.gsc-promotion:hover,
+.gsc-results .gsc-imageResult-classic:hover,
+.gsc-results .gsc-imageResult-column:hover {
+ border: 1px solid #fff;
+}
+.gs-web-image-box,
+.gs-promotion-image-box {
+ padding: 2px 0;
+}
+.gs-promotion-image-box img.gs-promotion-image {
+ max-width: 50px;
+}
+.gs-promotion-image-box img.gs-promotion-image,
+.gs-promotion-image-box {
+ width: 50px;
+}
+.gs-web-image-box img.gs-image {
+ max-width: 70px;
+ max-height: 70px;
+}
+
+.gs-web-image-box-landscape img.gs-image {
+ max-width: 70px;
+ max-height: 50px;
+}
+
+.gs-web-image-box-portrait img.gs-image {
+ max-width: 50px;
+ max-height: 120px;
+}
+
+.gs-image-box.gs-web-image-box.gs-web-image-box-landscape {
+ width: 80px;
+}
+
+.gs-image-box.gs-web-image-box.gs-web-image-box-portrait {
+ width: 60px;
+ height: 50px;
+ overflow: hidden;
+}
+
+.gs-web-image-box {
+ text-align: inherit;
+}
+.gs-promotion-image-box img.gs-promotion-image {
+ border: 1px solid #ebebeb;
+}
+/*Promotion Settings*/
+/* The entire promo */
+.cse .gsc-webResult.gsc-result.gsc-promotion,
+.gsc-webResult.gsc-result.gsc-promotion {
+ background-color: #F6F6F6;
+ margin-top: 5px;
+ margin-bottom: 10px;
+}
+.gsc-result-info {
+ margin-top: 0;
+ margin-bottom: 0;
+ padding: 8px;
+ padding-bottom: 10px;
+}
+.gs-promotion-text-cell .gs-visibleUrl,
+.gs-promotion-text-cell .gs-snippet {
+ font-size: 13px;
+}
+
+.gsc-table-result,
+.gsc-thumbnail-inside,
+.gsc-url-top {
+ padding-left: 8px;
+ padding-right: 8px;
+}
+
+.gs-promotion-table {
+ margin-left: 8px;
+ margin-right: 8px;
+}
+
+.gs-promotion table {
+ padding-left: 8px;
+ padding-right: 8px;
+}
+
+table.gs-promotion-table-snippet-with-image{
+ padding-left: 0;
+ padding-right: 0;
+}
+
+.gs-promotion-text-cell {
+ margin-left: 8px;
+ margin-right: 8px;
+}
+
+.gs-promotion-text-cell-with-image {
+ padding-left: 10px;
+ padding-right: 10px;
+ vertical-align: top;
+}
+
+/* Promotion links */
+.cse .gs-promotion a.gs-title:link,
+.gs-promotion a.gs-title:link,
+.cse .gs-promotion a.gs-title:link *,
+.gs-promotion a.gs-title:link *,
+.cse .gs-promotion .gs-snippet a:link,
+.gs-promotion .gs-snippet a:link {
+ color: #15C;
+}
+.cse .gs-promotion a.gs-title:visited,
+.gs-promotion a.gs-title:visited,
+.cse .gs-promotion a.gs-title:visited *,
+.gs-promotion a.gs-title:visited *,
+.cse .gs-promotion .gs-snippet a:visited,
+.gs-promotion .gs-snippet a:visited {
+ color: #15C;
+}
+.cse .gs-promotion a.gs-title:hover,
+.gs-promotion a.gs-title:hover,
+.cse .gs-promotion a.gs-title:hover *,
+.gs-promotion a.gs-title:hover *,
+.cse .gs-promotion .gs-snippet a:hover,
+.gs-promotion .gs-snippet a:hover {
+ color: #15C;
+}
+.cse .gs-promotion a.gs-title:active,
+.gs-promotion a.gs-title:active,
+.cse .gs-promotion a.gs-title:active *,
+.gs-promotion a.gs-title:active *,
+.cse .gs-promotion .gs-snippet a:active,
+.gs-promotion .gs-snippet a:active {
+ color: #15C;
+}
+/* Promotion snippet */
+.cse .gs-promotion .gs-snippet,
+.gs-promotion .gs-snippet,
+.cse .gs-promotion .gs-title .gs-promotion-title-right,
+.gs-promotion .gs-title .gs-promotion-title-right,
+.cse .gs-promotion .gs-title .gs-promotion-title-right *,
+.gs-promotion .gs-title .gs-promotion-title-right * {
+ color: #000;
+}
+/* Promotion url */
+.cse .gs-promotion .gs-visibleUrl,
+.gs-promotion .gs-visibleUrl {
+ color: #093;
+}
+/* Style for auto-completion table
+ * .gsc-completion-selected : styling for a suggested query which the user has moused-over
+ * .gsc-completion-container : styling for the table which contains the completions
+ */
+.gsc-completion-selected {
+ background: #EEE;
+}
+
+.gsc-completion-container {
+ font-family: Arial, sans-serif;
+ font-size: 16px;
+ background: white;
+ border: 1px solid #CCC;
+ border-top-color: #D9D9D9;
+ margin: 0;
+}
+
+.gsc-completion-title {
+ color: #15C;
+}
+.gsc-completion-snippet {
+ color: #000;
+}
+
+/* Full URL */
+.gs-webResult div.gs-visibleUrl-short,
+.gs-promotion div.gs-visibleUrl-short {
+ display: none;
+}
+.gs-webResult div.gs-visibleUrl-long,
+.gs-promotion div.gs-visibleUrl-long {
+ display: block;
+}
+
+/* Keneddy shows url at the top of the snippet, after title */
+.gsc-url-top {
+ display: block;
+}
+
+.gsc-url-bottom {
+ display: none;
+}
+
+/* Keneddy shows thumbnail inside the snippet, under title and url */
+.gsc-thumbnail-left {
+ display: none;
+}
+
+.gsc-thumbnail-inside {
+ display: block;
+}
+
+.gsc-result .gs-title {
+ height: 1.2em;
+}
+
+.gs-result .gs-title,
+.gs-result .gs-title * {
+ color: #15C;
+}
+
+.gs-result a.gs-visibleUrl,
+.gs-result .gs-visibleUrl {
+ color: #093;
+ text-decoration: none;
+ padding-bottom: 2px;
+}
+
+.gsc-results .gsc-cursor-box {
+ margin: 10px;
+}
+
+.gsc-results .gsc-cursor-box .gsc-cursor-page {
+ text-decoration: none;
+}
+
+.gsc-results .gsc-cursor-box .gsc-cursor-page:hover {
+ text-decoration: underline;
+}
+
+.gsc-results .gsc-cursor-box .gsc-cursor-current-page {
+ text-decoration: none;
+ color: #DD4B39;
+}
+
+.gsc-preview-reviews,
+.gsc-control-cse .gs-snippet,
+.gsc-control-cse .gs-promotion em,
+.gsc-control-cse .gs-snippet,
+.gsc-control-cse .gs-promotion em {
+ color: #333;
+}
+
+.gsc-control-cse-zh_CN .gs-snippet b,
+.gsc-control-cse-zh_CN .gs-promotion em,
+.gsc-control-cse-zh_TW .gs-snippet b,
+.gsc-control-cse-zh_TW .gs-promotion em {
+ color: #C03;
+}
+
+.gsc-snippet-metadata,
+.gsc-role,
+.gsc-tel,
+.gsc-org,
+.gsc-location,
+.gsc-reviewer,
+.gsc-author {
+ color: #666;
+}
+
+.gsc-wrapper.gsc-thinWrapper {
+ border-right: 1px solid #e9e9e9;
+}
+
+.gs-spelling a {
+ color: #15C;
+}
+
+.gs-spelling {
+ color: #333;
+ padding-left: 7px;
+ padding-right: 7px;
+}
+
+.gs-snippet {
+ margin-top: 1px;
+}
+
+div.gsc-clear-button {
+ background-image: url('//www.google.com/uds/css/v2/clear.png');
+}
+
+div.gsc-clear-button:hover {
+ background-image: url('//www.google.com/uds/css/v2/clear-hover.png');
+}
+
+.gsc-preview-reviews ul {
+ padding-left: 0;
+ padding-right: 0;
+}
+
+.gsc-completion-container .gsc-completion-icon-cell {
+ width: 42px;
+ height: 42px;
+ padding-right: 10px;
+}
+
+td.gsc-branding-text, td.gcsc-branding-text {
+ color: #666;
+}
+
+.gcsc-branding {
+ padding-top: 4px;
+ padding-left: 8px;
+ padding-right: 8px;
+}
+
+.gsc-adBlock {
+ padding-bottom: 5px;
+}
+
+.gsc-table-cell-snippet-close,
+.gsc-table-cell-snippet-open {
+ padding-left: 0;
+ padding-right: 0;
+}
+
+.gsc-selected-option-container {
+ background-color: whiteSmoke;
+ background-image: linear-gradient(top,whiteSmoke,#F1F1F1);
+ background-image: -webkit-linear-gradient(top,whiteSmoke,#F1F1F1);
+ background-image: -moz-linear-gradient(top,whiteSmoke,#F1F1F1);
+ background-image: -ms-linear-gradient(top,whiteSmoke,#F1F1F1);
+ background-image: -o-linear-gradient(top,whiteSmoke,#F1F1F1);
+}
+
+
diff --git a/doc/html/team.shtml b/doc/html/team.shtml
index 0554bf4..1c56e79 100644
--- a/doc/html/team.shtml
+++ b/doc/html/team.shtml
@@ -2,13 +2,13 @@
<h1>SLURM Team</h1>
<p>SLURM development has been a joint effort of many companies and
-organizations. The current SLURM development staff includes: </p>
+organizations. Lead SLURM developers are:
<ul>
-<li>Danny Auble (SchedMD)</li>
-<li>Morris Jette (SchedMD)</li>
+<li>Danny Auble (SchedMD, formerly with Lawrence Livermore National Laboratory)</li>
+<li>Morris Jette (SchedMD, formerly with Lawrence Livermore National Laboratory)</li>
</ul>
-<p> SLURM contributers include: </p>
+<p>SLURM contributors include:</p>
<ul>
<li>Ramiro Alba (Centre Tecnològic de Tranferència de Calor, Spain)</li>
<li>Amjad Majid Ali (Colorado State University)</li>
@@ -18,60 +18,65 @@
<li>Susanne Balle (HP)</li>
<li>Ralph Bean (Rochester Institute of Technology)</li>
<li>Alexander Bersenev (Institute of Mathematics and Mechanics, Russia)</li>
+<li>Nicolas Bigaouette</li>
<li>Anton Blanchard (Samba)</li>
<li>Janne Blomqvist (Aalto University, Finland)</li>
-<li>David Bremer (LLNL)</li>
-<li>Jon Bringhurst (LANL)</li>
+<li>David Bremer (Lawrence Livermore National Laboratory)</li>
+<li>Jon Bringhurst (Los Alamos National Laboratory)</li>
<li>Bill Brophy (Bull)</li>
<li>Hongjia Cao (National University of Defense Techonogy, China)</li>
<li>Daniel Christians (HP)</li>
<li>Gilles Civario (Bull)</li>
<li>Chuck Clouston (Bull)</li>
+<li>Francois Diakhate (CEA, France)</li>
<li>Phil Eckert (Lawrence Livermore National Laboratory)</li>
<li>Yuri D'Elia (Center for Biomedicine, EURAC Research, Italy)</li>
<li>Carles Fenoy (Barcelona Supercomputer Center, Spain)</li>
-<li>Joseph Donaghy (LLNL)</li>
-<li>Chris Dunlap (LLNL)</li>
-<li>Joey Ekstrom (LLNL/Bringham Young University)</li>
+<li>Joseph Donaghy (Lawrence Livermore National Laboratory)</li>
+<li>Chris Dunlap (Lawrence Livermore National Laboratory)</li>
+<li>Joey Ekstrom (Lawrence Livermore National Laboratory/Bringham Young University)</li>
<li>Josh England (TGS Management Corporation)</li>
<li>Kent Engström (National Supercomputer Centre, Sweden)</li>
-<li>Jim Garlick (LLNL)</li>
+<li>Jim Garlick (Lawrence Livermore National Laboratory)</li>
<li>Didier Gazen (Laboratoire d'Aerologie, France)</li>
<li>Raphael Geissert (Debian)</li>
<li>Yiannis Georgiou (Bull)</li>
+<li>Mark Grondona (Lawrence Livermore National Laboratory)</li>
<li>Andriy Grytsenko (Massive Solutions Limited, Ukraine)</li>
-<li>Mark Grondona (LLNL)</li>
-<li>Takao Hatazaki (HP, Japan)</li>
+<li>Takao Hatazaki (HP)</li>
<li>Matthieu Hautreux (CEA, France)</li>
<li>Chris Holmes (HP)</li>
<li>David Höppner</li>
<li>Nathan Huff (North Dakota State University)</li>
<li>David Jackson (Adaptive Computing)</li>
+<li>Alec Jensen (SchedMD)</li>
<li>Klaus Joas (University Karlsruhe, Germany)</li>
-<li>Greg Johnson (LANL)</li>
-<li>Jason King (LLNL)</li>
+<li>Greg Johnson (Los Alamos National Laboratory)</li>
+<li>Jason King (Lawrence Livermore National Laboratory)</li>
+<li>Yury Kiryanov (Intel)</li>
<li>Aaron Knister (Environmental Protection Agency, UMBC)</li>
<li>Nancy Kritkausky (Bull)</li>
<li>Roman Kurakin (Institute of Natural Science and Ecology, Russia)</li>
+<li>Sam Lang</li>
<li>Puenlap Lee (Bull)</li>
<li>Dennis Leepow</li>
<li>Bernard Li (Genome Sciences Centre, Canada)</li>
<li>Eric Lin (Bull)</li>
-<li>Donald Lipari (LLNL)</li>
+<li>Donald Lipari (Lawrence Livermore National Laboratory)</li>
+<li>Komoto Masahiro</li>
<li>Steven McDougall (SiCortex)</li>
-<li>Donna Mecozzi (LLNL)</li>
+<li>Donna Mecozzi (Lawrence Livermore National Laboratory)</li>
<li>Bjørn-Helge Mevik (University of Oslo, Norway)</li>
-<li>Chris Morrone (LLNL)</li>
+<li>Chris Morrone (Lawrence Livermore National Laboratory)</li>
<li>Pere Munt (Barcelona Supercomputer Center, Spain)</li>
<li>Mark Nelson (IBM)</li>
<li>Michal Novotny (Masaryk University, Czech Republic)</li>
<li>Bryan O'Sullivan (Pathscale)</li>
-<li>Gennaro Oliva (Institute of High Performance Computing and
- Networking, Italy)</li>
+<li>Gennaro Oliva (Institute of High Performance Computing and Networking, Italy)</li>
<li>Rémi Palancher</li>
<li>Alejandro Lucero Palau (Barcelona Supercomputer Center, Spain)</li>
<li>Daniel Palermo (HP)</li>
-<li>Dan Phung (LLNL/Columbia University)</li>
+<li>Dan Phung (Lawrence Livermore National Laboratory/Columbia University)</li>
<li>Ashley Pittman (Quadrics, UK)</li>
<li>Vijay Ramasubramanian (University of Maryland)</li>
<li>Krishnakumar Ravi[KK] (HP)</li>
@@ -86,22 +91,26 @@
<li>Dan Rusak (Bull)</li>
<li>Eygene Ryabinkin (Kurchatov Institute, Russia)</li>
<li>Federico Sacerdoti (D.E. Shaw)</li>
+<li>Aleksej Saushev</li>
<li>Rod Schultz (Bull)</li>
<li>Tyler Strickland (University of Florida)</li>
<li>Jeff Squyres (LAM MPI)</li>
<li>Prashanth Tamraparni (HP, India)</li>
<li>Jimmy Tang (Trinity College, Ireland)</li>
-<li>Kevin Tew (LLNL/Bringham Young University)</li>
+<li>Kevin Tew (Lawrence Livermore National Laboratory/Bringham Young University)</li>
<li>John Thiltges (University of Nebraska-Lincoln)</li>
<li>Adam Todorski (Rensselaer Polytechnic Institute)</li>
<li>Stephen Trofinoff (Swiss National Supercomputing Centre)</li>
<li>Nathan Weeks (Iowa State University)</li>
+<li>Andy Wettstein (University of Chicago)</li>
<li>Tim Wickberg (Rensselaer Polytechnic Institute)</li>
<li>Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil)</li>
<li>Jay Windley (Linux NetworX)</li>
<li>Anne-Marie Wunderlin (Bull)</li>
+<li>Nathan Yee (SchedMD)</li>
+<li>Damien François (UCL)</li>
</ul>
-<p style="text-align:center;">Last modified 24 January 2012</p>
+<p style="text-align:center;">Last modified 19 July 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/topology.shtml b/doc/html/topology.shtml
index bb2241f..4b36405 100644
--- a/doc/html/topology.shtml
+++ b/doc/html/topology.shtml
@@ -14,7 +14,7 @@
For larger jobs, this minimizes the number of sets of consecutive nodes
allocated to the job.</p>
-<a name="topo_3d"><h2>Three-dimension Topology</h2>
+<a name="topo_3d"><h2>Three-dimension Topology</h2></a>
<p>Some larger computers rely upon a three-dimensional torus interconnect.
The IBM BlueGene computers is one example of this which has highly
@@ -132,6 +132,6 @@
SwitchName=s4 Switches=s[0-3]
</pre>
-<p style="text-align:center;">Last modified 8 July 2011</p>
+<p style="text-align:center;">Last modified 27 March 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/html/troubleshoot.shtml b/doc/html/troubleshoot.shtml
index 676d792..6d1647b 100644
--- a/doc/html/troubleshoot.shtml
+++ b/doc/html/troubleshoot.shtml
@@ -54,7 +54,7 @@
You should check the log file (<i>SlurmctldLog</i> in the
<i>slurm.conf</i> file) for an indication of why it failed.
If it keeps failing, you should contact the slurm team for help at
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</li>
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.</li>
<li>If slurmctld is running but not responding (a very rare situation),
then kill and restart it (typically as user root using the commands
@@ -65,7 +65,7 @@
and restart.
Again check the log file for an indication of why it failed.
At this point, you should contact the slurm team for help at
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</li>
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.</li>
<li>If it continues to fail without an indication as to the failure
mode, restart without preserving state (typically as user root
@@ -165,7 +165,7 @@
Check the value of "Last slurmctld msg time" to determine
if the slurmctld is able to communicate with the slurmd.
If it keeps failing, you should contact the slurm team for help at
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</li>
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.</li>
<li>If slurmd is running but not responding (a very rare situation),
then kill and restart it (typically as user root using the commands
@@ -179,7 +179,7 @@
and restart.
Again check the log file for an indication of why it failed.
At this point, you should contact the slurm team for help at
-<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</li>
+<a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a>.</li>
<li>If still not responding without an indication as to the failure
mode, restart without preserving state (typically as user root
@@ -296,6 +296,6 @@
</ol>
<p class="footer"><a href="#top">top</a></p>
-<p style="text-align:center;">Last modified 7 July 2007</p>
+<p style="text-align:center;">Last modified 3 February 2012</p>
<!--#include virtual="footer.txt"-->
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index b1e23bc..5a3ddf7 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -1,175 +1,3 @@
-EXTRA_DIST = man1 man3 man5 man8
-
-man1_MANS = \
- man1/sacct.1 \
- man1/sacctmgr.1 \
- man1/salloc.1 \
- man1/sattach.1 \
- man1/sbatch.1 \
- man1/sbcast.1 \
- man1/scancel.1 \
- man1/scontrol.1 \
- man1/sinfo.1 \
- man1/slurm.1 \
- man1/smap.1 \
- man1/sprio.1 \
- man1/squeue.1 \
- man1/sreport.1 \
- man1/srun.1 \
- man1/srun_cr.1 \
- man1/sshare.1 \
- man1/sstat.1 \
- man1/strigger.1 \
- man1/sview.1
-
-man3_MANS = man3/slurm_hostlist_create.3 \
- man3/slurm_hostlist_destroy.3 \
- man3/slurm_hostlist_shift.3 \
- man3/slurm_allocate_resources.3 \
- man3/slurm_allocate_resources_blocking.3 \
- man3/slurm_allocation_lookup.3 \
- man3/slurm_allocation_lookup_lite.3 \
- man3/slurm_allocation_msg_thr_create.3 \
- man3/slurm_allocation_msg_thr_destroy.3 \
- man3/slurm_api_version.3 \
- man3/slurm_checkpoint.3 \
- man3/slurm_checkpoint_able.3 \
- man3/slurm_checkpoint_complete.3 \
- man3/slurm_checkpoint_create.3 \
- man3/slurm_checkpoint_disable.3 \
- man3/slurm_checkpoint_enable.3 \
- man3/slurm_checkpoint_error.3 \
- man3/slurm_checkpoint_failed.3 \
- man3/slurm_checkpoint_restart.3 \
- man3/slurm_checkpoint_task_complete.3 \
- man3/slurm_checkpoint_tasks.3 \
- man3/slurm_checkpoint_vacate.3 \
- man3/slurm_clear_trigger.3 \
- man3/slurm_complete_job.3 \
- man3/slurm_confirm_allocation.3 \
- man3/slurm_create_partition.3 \
- man3/slurm_create_reservation.3 \
- man3/slurm_delete_partition.3 \
- man3/slurm_delete_reservation.3 \
- man3/slurm_free_ctl_conf.3 \
- man3/slurm_free_front_end_info_msg.3 \
- man3/slurm_free_job_info_msg.3 \
- man3/slurm_free_job_alloc_info_response_msg.3 \
- man3/slurm_free_job_step_create_response_msg.3 \
- man3/slurm_free_job_step_info_response_msg.3 \
- man3/slurm_free_node_info.3 \
- man3/slurm_free_node_info_msg.3 \
- man3/slurm_free_partition_info.3 \
- man3/slurm_free_partition_info_msg.3 \
- man3/slurm_free_reservation_info_msg.3 \
- man3/slurm_free_resource_allocation_response_msg.3 \
- man3/slurm_free_slurmd_status.3 \
- man3/slurm_free_submit_response_response_msg.3 \
- man3/slurm_free_trigger_msg.3 \
- man3/slurm_get_end_time.3 \
- man3/slurm_get_errno.3 \
- man3/slurm_get_job_steps.3 \
- man3/slurm_get_rem_time.3 \
- man3/slurm_get_select_jobinfo.3 \
- man3/slurm_get_triggers.3 \
- man3/slurm_init_update_front_end_msg.3 \
- man3/slurm_init_job_desc_msg.3 \
- man3/slurm_init_part_desc_msg.3 \
- man3/slurm_init_resv_desc_msg.3 \
- man3/slurm_init_update_node_msg.3 \
- man3/slurm_init_update_step_msg.3 \
- man3/slurm_job_cpus_allocated_on_node.3 \
- man3/slurm_job_cpus_allocated_on_node_id.3 \
- man3/slurm_job_step_create.3 \
- man3/slurm_job_step_launch_t_init.3 \
- man3/slurm_job_step_layout_get.3 \
- man3/slurm_job_step_layout_free.3 \
- man3/slurm_job_will_run.3 \
- man3/slurm_jobinfo_ctx_get.3 \
- man3/slurm_kill_job.3 \
- man3/slurm_kill_job_step.3 \
- man3/slurm_load_ctl_conf.3 \
- man3/slurm_load_front_end.3 \
- man3/slurm_load_job.3 \
- man3/slurm_load_jobs.3 \
- man3/slurm_load_node.3 \
- man3/slurm_load_partitions.3 \
- man3/slurm_load_reservations.3 \
- man3/slurm_load_slurmd_status.3 \
- man3/slurm_notify_job.3 \
- man3/slurm_perror.3 \
- man3/slurm_pid2jobid.3 \
- man3/slurm_ping.3 \
- man3/slurm_print_ctl_conf.3 \
- man3/slurm_print_front_end_info_msg.3 \
- man3/slurm_print_front_end_table.3 \
- man3/slurm_print_job_info.3 \
- man3/slurm_print_job_info_msg.3 \
- man3/slurm_print_job_step_info.3 \
- man3/slurm_print_job_step_info_msg.3 \
- man3/slurm_print_node_info_msg.3 \
- man3/slurm_print_node_table.3 \
- man3/slurm_print_partition_info.3 \
- man3/slurm_print_partition_info_msg.3 \
- man3/slurm_print_reservation_info.3 \
- man3/slurm_print_reservation_info_msg.3 \
- man3/slurm_print_slurmd_status.3 \
- man3/slurm_read_hostfile.3 \
- man3/slurm_reconfigure.3 \
- man3/slurm_resume.3 \
- man3/slurm_requeue.3 \
- man3/slurm_set_debug_level.3 \
- man3/slurm_set_trigger.3 \
- man3/slurm_shutdown.3 \
- man3/slurm_signal_job.3 \
- man3/slurm_signal_job_step.3 \
- man3/slurm_slurmd_status.3 \
- man3/slurm_sprint_front_end_table.3 \
- man3/slurm_sprint_job_info.3 \
- man3/slurm_sprint_job_step_info.3 \
- man3/slurm_sprint_node_table.3 \
- man3/slurm_sprint_partition_info.3 \
- man3/slurm_sprint_reservation_info.3 \
- man3/slurm_step_ctx_create.3 \
- man3/slurm_step_ctx_create_no_alloc.3 \
- man3/slurm_step_ctx_daemon_per_node_hack.3 \
- man3/slurm_step_ctx_destroy.3 \
- man3/slurm_step_ctx_params_t_init.3 \
- man3/slurm_step_ctx_get.3 \
- man3/slurm_step_launch.3 \
- man3/slurm_step_launch_fwd_signal.3 \
- man3/slurm_step_launch_abort.3 \
- man3/slurm_step_launch_wait_finish.3 \
- man3/slurm_step_launch_wait_start.3 \
- man3/slurm_strerror.3 \
- man3/slurm_submit_batch_job.3 \
- man3/slurm_suspend.3 \
- man3/slurm_takeover.3 \
- man3/slurm_terminate_job.3 \
- man3/slurm_terminate_job_step.3 \
- man3/slurm_update_front_end.3 \
- man3/slurm_update_job.3 \
- man3/slurm_update_node.3 \
- man3/slurm_update_partition.3 \
- man3/slurm_update_reservation.3 \
- man3/slurm_update_step.3
-
-man5_MANS = man5/bluegene.conf.5 \
- man5/cgroup.conf.5 \
- man5/cray.conf.5 \
- man5/gres.conf.5 \
- man5/slurm.conf.5 \
- man5/slurmdbd.conf.5 \
- man5/topology.conf.5 \
- man5/wiki.conf.5
-
-man8_MANS = man8/slurmctld.8 \
- man8/slurmd.8 \
- man8/slurmdbd.8 \
- man8/slurmstepd.8 \
- man8/spank.8
-
-dist-hook:
- -rm -rf `find $(distdir) -name CVS`
+SUBDIRS = man1 man3 man5 man8
diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in
index 2c59e66..a288167 100644
--- a/doc/man/Makefile.in
+++ b/doc/man/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -83,36 +83,47 @@
CONFIG_CLEAN_VPATH_FILES =
SOURCES =
DIST_SOURCES =
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-man1dir = $(mandir)/man1
-am__installdirs = "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" \
- "$(DESTDIR)$(man5dir)" "$(DESTDIR)$(man8dir)"
-man3dir = $(mandir)/man3
-man5dir = $(mandir)/man5
-man8dir = $(mandir)/man8
-NROFF = nroff
-MANS = $(man1_MANS) $(man3_MANS) $(man5_MANS) $(man8_MANS)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+ distdir
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AR = @AR@
@@ -190,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -226,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -313,177 +326,8 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-EXTRA_DIST = man1 man3 man5 man8
-man1_MANS = \
- man1/sacct.1 \
- man1/sacctmgr.1 \
- man1/salloc.1 \
- man1/sattach.1 \
- man1/sbatch.1 \
- man1/sbcast.1 \
- man1/scancel.1 \
- man1/scontrol.1 \
- man1/sinfo.1 \
- man1/slurm.1 \
- man1/smap.1 \
- man1/sprio.1 \
- man1/squeue.1 \
- man1/sreport.1 \
- man1/srun.1 \
- man1/srun_cr.1 \
- man1/sshare.1 \
- man1/sstat.1 \
- man1/strigger.1 \
- man1/sview.1
-
-man3_MANS = man3/slurm_hostlist_create.3 \
- man3/slurm_hostlist_destroy.3 \
- man3/slurm_hostlist_shift.3 \
- man3/slurm_allocate_resources.3 \
- man3/slurm_allocate_resources_blocking.3 \
- man3/slurm_allocation_lookup.3 \
- man3/slurm_allocation_lookup_lite.3 \
- man3/slurm_allocation_msg_thr_create.3 \
- man3/slurm_allocation_msg_thr_destroy.3 \
- man3/slurm_api_version.3 \
- man3/slurm_checkpoint.3 \
- man3/slurm_checkpoint_able.3 \
- man3/slurm_checkpoint_complete.3 \
- man3/slurm_checkpoint_create.3 \
- man3/slurm_checkpoint_disable.3 \
- man3/slurm_checkpoint_enable.3 \
- man3/slurm_checkpoint_error.3 \
- man3/slurm_checkpoint_failed.3 \
- man3/slurm_checkpoint_restart.3 \
- man3/slurm_checkpoint_task_complete.3 \
- man3/slurm_checkpoint_tasks.3 \
- man3/slurm_checkpoint_vacate.3 \
- man3/slurm_clear_trigger.3 \
- man3/slurm_complete_job.3 \
- man3/slurm_confirm_allocation.3 \
- man3/slurm_create_partition.3 \
- man3/slurm_create_reservation.3 \
- man3/slurm_delete_partition.3 \
- man3/slurm_delete_reservation.3 \
- man3/slurm_free_ctl_conf.3 \
- man3/slurm_free_front_end_info_msg.3 \
- man3/slurm_free_job_info_msg.3 \
- man3/slurm_free_job_alloc_info_response_msg.3 \
- man3/slurm_free_job_step_create_response_msg.3 \
- man3/slurm_free_job_step_info_response_msg.3 \
- man3/slurm_free_node_info.3 \
- man3/slurm_free_node_info_msg.3 \
- man3/slurm_free_partition_info.3 \
- man3/slurm_free_partition_info_msg.3 \
- man3/slurm_free_reservation_info_msg.3 \
- man3/slurm_free_resource_allocation_response_msg.3 \
- man3/slurm_free_slurmd_status.3 \
- man3/slurm_free_submit_response_response_msg.3 \
- man3/slurm_free_trigger_msg.3 \
- man3/slurm_get_end_time.3 \
- man3/slurm_get_errno.3 \
- man3/slurm_get_job_steps.3 \
- man3/slurm_get_rem_time.3 \
- man3/slurm_get_select_jobinfo.3 \
- man3/slurm_get_triggers.3 \
- man3/slurm_init_update_front_end_msg.3 \
- man3/slurm_init_job_desc_msg.3 \
- man3/slurm_init_part_desc_msg.3 \
- man3/slurm_init_resv_desc_msg.3 \
- man3/slurm_init_update_node_msg.3 \
- man3/slurm_init_update_step_msg.3 \
- man3/slurm_job_cpus_allocated_on_node.3 \
- man3/slurm_job_cpus_allocated_on_node_id.3 \
- man3/slurm_job_step_create.3 \
- man3/slurm_job_step_launch_t_init.3 \
- man3/slurm_job_step_layout_get.3 \
- man3/slurm_job_step_layout_free.3 \
- man3/slurm_job_will_run.3 \
- man3/slurm_jobinfo_ctx_get.3 \
- man3/slurm_kill_job.3 \
- man3/slurm_kill_job_step.3 \
- man3/slurm_load_ctl_conf.3 \
- man3/slurm_load_front_end.3 \
- man3/slurm_load_job.3 \
- man3/slurm_load_jobs.3 \
- man3/slurm_load_node.3 \
- man3/slurm_load_partitions.3 \
- man3/slurm_load_reservations.3 \
- man3/slurm_load_slurmd_status.3 \
- man3/slurm_notify_job.3 \
- man3/slurm_perror.3 \
- man3/slurm_pid2jobid.3 \
- man3/slurm_ping.3 \
- man3/slurm_print_ctl_conf.3 \
- man3/slurm_print_front_end_info_msg.3 \
- man3/slurm_print_front_end_table.3 \
- man3/slurm_print_job_info.3 \
- man3/slurm_print_job_info_msg.3 \
- man3/slurm_print_job_step_info.3 \
- man3/slurm_print_job_step_info_msg.3 \
- man3/slurm_print_node_info_msg.3 \
- man3/slurm_print_node_table.3 \
- man3/slurm_print_partition_info.3 \
- man3/slurm_print_partition_info_msg.3 \
- man3/slurm_print_reservation_info.3 \
- man3/slurm_print_reservation_info_msg.3 \
- man3/slurm_print_slurmd_status.3 \
- man3/slurm_read_hostfile.3 \
- man3/slurm_reconfigure.3 \
- man3/slurm_resume.3 \
- man3/slurm_requeue.3 \
- man3/slurm_set_debug_level.3 \
- man3/slurm_set_trigger.3 \
- man3/slurm_shutdown.3 \
- man3/slurm_signal_job.3 \
- man3/slurm_signal_job_step.3 \
- man3/slurm_slurmd_status.3 \
- man3/slurm_sprint_front_end_table.3 \
- man3/slurm_sprint_job_info.3 \
- man3/slurm_sprint_job_step_info.3 \
- man3/slurm_sprint_node_table.3 \
- man3/slurm_sprint_partition_info.3 \
- man3/slurm_sprint_reservation_info.3 \
- man3/slurm_step_ctx_create.3 \
- man3/slurm_step_ctx_create_no_alloc.3 \
- man3/slurm_step_ctx_daemon_per_node_hack.3 \
- man3/slurm_step_ctx_destroy.3 \
- man3/slurm_step_ctx_params_t_init.3 \
- man3/slurm_step_ctx_get.3 \
- man3/slurm_step_launch.3 \
- man3/slurm_step_launch_fwd_signal.3 \
- man3/slurm_step_launch_abort.3 \
- man3/slurm_step_launch_wait_finish.3 \
- man3/slurm_step_launch_wait_start.3 \
- man3/slurm_strerror.3 \
- man3/slurm_submit_batch_job.3 \
- man3/slurm_suspend.3 \
- man3/slurm_takeover.3 \
- man3/slurm_terminate_job.3 \
- man3/slurm_terminate_job_step.3 \
- man3/slurm_update_front_end.3 \
- man3/slurm_update_job.3 \
- man3/slurm_update_node.3 \
- man3/slurm_update_partition.3 \
- man3/slurm_update_reservation.3 \
- man3/slurm_update_step.3
-
-man5_MANS = man5/bluegene.conf.5 \
- man5/cgroup.conf.5 \
- man5/cray.conf.5 \
- man5/gres.conf.5 \
- man5/slurm.conf.5 \
- man5/slurmdbd.conf.5 \
- man5/topology.conf.5 \
- man5/wiki.conf.5
-
-man8_MANS = man8/slurmctld.8 \
- man8/slurmd.8 \
- man8/slurmdbd.8 \
- man8/slurmstepd.8 \
- man8/spank.8
-
-all: all-am
+SUBDIRS = man1 man3 man5 man8
+all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@@ -522,163 +366,143 @@
clean-libtool:
-rm -rf .libs _libs
-install-man1: $(man1_MANS)
- @$(NORMAL_INSTALL)
- test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
- @list='$(man1_MANS)'; test -n "$(man1dir)" || exit 0; \
- { for i in $$list; do echo "$$i"; done; \
- } | while read p; do \
- if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; echo "$$p"; \
- done | \
- sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
- sed 'N;N;s,\n, ,g' | { \
- list=; while read file base inst; do \
- if test "$$base" = "$$inst"; then list="$$list $$file"; else \
- echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
- $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
fi; \
done; \
- for i in $$list; do echo "$$i"; done | $(am__base_list) | \
- while read files; do \
- test -z "$$files" || { \
- echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
- $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
- done; }
-
-uninstall-man1:
- @$(NORMAL_UNINSTALL)
- @list='$(man1_MANS)'; test -n "$(man1dir)" || exit 0; \
- files=`{ for i in $$list; do echo "$$i"; done; \
- } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
- test -z "$$files" || { \
- echo " ( cd '$(DESTDIR)$(man1dir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(man1dir)" && rm -f $$files; }
-install-man3: $(man3_MANS)
- @$(NORMAL_INSTALL)
- test -z "$(man3dir)" || $(MKDIR_P) "$(DESTDIR)$(man3dir)"
- @list='$(man3_MANS)'; test -n "$(man3dir)" || exit 0; \
- { for i in $$list; do echo "$$i"; done; \
- } | while read p; do \
- if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; echo "$$p"; \
- done | \
- sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
- sed 'N;N;s,\n, ,g' | { \
- list=; while read file base inst; do \
- if test "$$base" = "$$inst"; then list="$$list $$file"; else \
- echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man3dir)/$$inst'"; \
- $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man3dir)/$$inst" || exit $$?; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
fi; \
- done; \
- for i in $$list; do echo "$$i"; done | $(am__base_list) | \
- while read files; do \
- test -z "$$files" || { \
- echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man3dir)'"; \
- $(INSTALL_DATA) $$files "$(DESTDIR)$(man3dir)" || exit $$?; }; \
- done; }
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
-uninstall-man3:
- @$(NORMAL_UNINSTALL)
- @list='$(man3_MANS)'; test -n "$(man3dir)" || exit 0; \
- files=`{ for i in $$list; do echo "$$i"; done; \
- } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
- test -z "$$files" || { \
- echo " ( cd '$(DESTDIR)$(man3dir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(man3dir)" && rm -f $$files; }
-install-man5: $(man5_MANS)
- @$(NORMAL_INSTALL)
- test -z "$(man5dir)" || $(MKDIR_P) "$(DESTDIR)$(man5dir)"
- @list='$(man5_MANS)'; test -n "$(man5dir)" || exit 0; \
- { for i in $$list; do echo "$$i"; done; \
- } | while read p; do \
- if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; echo "$$p"; \
- done | \
- sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
- sed 'N;N;s,\n, ,g' | { \
- list=; while read file base inst; do \
- if test "$$base" = "$$inst"; then list="$$list $$file"; else \
- echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man5dir)/$$inst'"; \
- $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man5dir)/$$inst" || exit $$?; \
- fi; \
- done; \
- for i in $$list; do echo "$$i"; done | $(am__base_list) | \
- while read files; do \
- test -z "$$files" || { \
- echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man5dir)'"; \
- $(INSTALL_DATA) $$files "$(DESTDIR)$(man5dir)" || exit $$?; }; \
- done; }
-
-uninstall-man5:
- @$(NORMAL_UNINSTALL)
- @list='$(man5_MANS)'; test -n "$(man5dir)" || exit 0; \
- files=`{ for i in $$list; do echo "$$i"; done; \
- } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
- test -z "$$files" || { \
- echo " ( cd '$(DESTDIR)$(man5dir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(man5dir)" && rm -f $$files; }
-install-man8: $(man8_MANS)
- @$(NORMAL_INSTALL)
- test -z "$(man8dir)" || $(MKDIR_P) "$(DESTDIR)$(man8dir)"
- @list='$(man8_MANS)'; test -n "$(man8dir)" || exit 0; \
- { for i in $$list; do echo "$$i"; done; \
- } | while read p; do \
- if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; echo "$$p"; \
- done | \
- sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^8][0-9a-z]*$$,8,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
- sed 'N;N;s,\n, ,g' | { \
- list=; while read file base inst; do \
- if test "$$base" = "$$inst"; then list="$$list $$file"; else \
- echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man8dir)/$$inst'"; \
- $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man8dir)/$$inst" || exit $$?; \
- fi; \
- done; \
- for i in $$list; do echo "$$i"; done | $(am__base_list) | \
- while read files; do \
- test -z "$$files" || { \
- echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man8dir)'"; \
- $(INSTALL_DATA) $$files "$(DESTDIR)$(man8dir)" || exit $$?; }; \
- done; }
-
-uninstall-man8:
- @$(NORMAL_UNINSTALL)
- @list='$(man8_MANS)'; test -n "$(man8dir)" || exit 0; \
- files=`{ for i in $$list; do echo "$$i"; done; \
- } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^8][0-9a-z]*$$,8,;x' \
- -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
- test -z "$$files" || { \
- echo " ( cd '$(DESTDIR)$(man8dir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(man8dir)" && rm -f $$files; }
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
tags: TAGS
-TAGS:
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
ctags: CTAGS
-CTAGS:
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
- @list='$(MANS)'; if test -n "$$list"; then \
- list=`for p in $$list; do \
- if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
- if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
- if test -n "$$list" && \
- grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
- echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
- grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
- echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
- echo " typically \`make maintainer-clean' will remove them" >&2; \
- exit 1; \
- else :; fi; \
- else :; fi
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
@@ -708,30 +532,58 @@
|| exit 1; \
fi; \
done
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$(top_distdir)" distdir="$(distdir)" \
- dist-hook
-check-am: all-am
-check: check-am
-all-am: Makefile $(MANS)
-installdirs:
- for dir in "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(man5dir)" "$(DESTDIR)$(man8dir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ fi; \
done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-installcheck: installcheck-am
+installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -743,94 +595,89 @@
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
+clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
-distclean: distclean-am
+distclean: distclean-recursive
-rm -f Makefile
-distclean-am: clean-am distclean-generic
+distclean-am: clean-am distclean-generic distclean-tags
-dvi: dvi-am
+dvi: dvi-recursive
dvi-am:
-html: html-am
+html: html-recursive
html-am:
-info: info-am
+info: info-recursive
info-am:
-install-data-am: install-man
+install-data-am:
-install-dvi: install-dvi-am
+install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
-install-html: install-html-am
+install-html: install-html-recursive
install-html-am:
-install-info: install-info-am
+install-info: install-info-recursive
install-info-am:
-install-man: install-man1 install-man3 install-man5 install-man8
+install-man:
-install-pdf: install-pdf-am
+install-pdf: install-pdf-recursive
install-pdf-am:
-install-ps: install-ps-am
+install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
-maintainer-clean: maintainer-clean-am
+maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
-mostlyclean: mostlyclean-am
+mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-pdf: pdf-am
+pdf: pdf-recursive
pdf-am:
-ps: ps-am
+ps: ps-recursive
ps-am:
-uninstall-am: uninstall-man
+uninstall-am:
-uninstall-man: uninstall-man1 uninstall-man3 uninstall-man5 \
- uninstall-man8
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
+ install-am install-strip tags-recursive
-.MAKE: install-am install-strip
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-am clean clean-generic clean-libtool \
+ ctags ctags-recursive distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+ uninstall uninstall-am
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- dist-hook distclean distclean-generic distclean-libtool \
- distdir dvi dvi-am html html-am info info-am install \
- install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-man1 install-man3 install-man5 install-man8 \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- uninstall uninstall-am uninstall-man uninstall-man1 \
- uninstall-man3 uninstall-man5 uninstall-man8
-
-
-dist-hook:
- -rm -rf `find $(distdir) -name CVS`
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/doc/man/man1/Makefile.am b/doc/man/man1/Makefile.am
new file mode 100644
index 0000000..8216b35
--- /dev/null
+++ b/doc/man/man1/Makefile.am
@@ -0,0 +1,61 @@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+
+man1_MANS = \
+ sacct.1 \
+ sacctmgr.1 \
+ salloc.1 \
+ sattach.1 \
+ sbatch.1 \
+ sbcast.1 \
+ scancel.1 \
+ scontrol.1 \
+ sdiag.1 \
+ sinfo.1 \
+ slurm.1 \
+ smap.1 \
+ sprio.1 \
+ squeue.1 \
+ sreport.1 \
+ srun.1 \
+ srun_cr.1 \
+ sshare.1 \
+ sstat.1 \
+ strigger.1 \
+ sview.1
+
+EXTRA_DIST = $(man1_MANS)
+
+if HAVE_MAN2HTML
+
+html_DATA = \
+ sacct.html \
+ sacctmgr.html \
+ salloc.html \
+ sattach.html \
+ sbatch.html \
+ sbcast.html \
+ scancel.html \
+ scontrol.html \
+ sdiag.html \
+ sinfo.html \
+ smap.html \
+ sprio.html \
+ squeue.html \
+ sreport.html \
+ srun.html \
+ srun_cr.html \
+ sshare.html \
+ sstat.html \
+ strigger.html \
+ sview.html
+
+MOSTLYCLEANFILES = ${html_DATA}
+
+EXTRA_DIST += $(html_DATA)
+
+SUFFIXES = .html
+
+.1.html:
+ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+endif
diff --git a/doc/man/man1/Makefile.in b/doc/man/man1/Makefile.in
new file mode 100644
index 0000000..c3f5c90
--- /dev/null
+++ b/doc/man/man1/Makefile.in
@@ -0,0 +1,638 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_MAN2HTML_TRUE@am__append_1 = $(html_DATA)
+subdir = doc/man/man1
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+man1dir = $(mandir)/man1
+am__installdirs = "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(htmldir)"
+NROFF = nroff
+MANS = $(man1_MANS)
+DATA = $(html_DATA)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+man1_MANS = \
+ sacct.1 \
+ sacctmgr.1 \
+ salloc.1 \
+ sattach.1 \
+ sbatch.1 \
+ sbcast.1 \
+ scancel.1 \
+ scontrol.1 \
+ sdiag.1 \
+ sinfo.1 \
+ slurm.1 \
+ smap.1 \
+ sprio.1 \
+ squeue.1 \
+ sreport.1 \
+ srun.1 \
+ srun_cr.1 \
+ sshare.1 \
+ sstat.1 \
+ strigger.1 \
+ sview.1
+
+EXTRA_DIST = $(man1_MANS) $(am__append_1)
+@HAVE_MAN2HTML_TRUE@html_DATA = \
+@HAVE_MAN2HTML_TRUE@ sacct.html \
+@HAVE_MAN2HTML_TRUE@ sacctmgr.html \
+@HAVE_MAN2HTML_TRUE@ salloc.html \
+@HAVE_MAN2HTML_TRUE@ sattach.html \
+@HAVE_MAN2HTML_TRUE@ sbatch.html \
+@HAVE_MAN2HTML_TRUE@ sbcast.html \
+@HAVE_MAN2HTML_TRUE@ scancel.html \
+@HAVE_MAN2HTML_TRUE@ scontrol.html \
+@HAVE_MAN2HTML_TRUE@ sdiag.html \
+@HAVE_MAN2HTML_TRUE@ sinfo.html \
+@HAVE_MAN2HTML_TRUE@ smap.html \
+@HAVE_MAN2HTML_TRUE@ sprio.html \
+@HAVE_MAN2HTML_TRUE@ squeue.html \
+@HAVE_MAN2HTML_TRUE@ sreport.html \
+@HAVE_MAN2HTML_TRUE@ srun.html \
+@HAVE_MAN2HTML_TRUE@ srun_cr.html \
+@HAVE_MAN2HTML_TRUE@ sshare.html \
+@HAVE_MAN2HTML_TRUE@ sstat.html \
+@HAVE_MAN2HTML_TRUE@ strigger.html \
+@HAVE_MAN2HTML_TRUE@ sview.html
+
+@HAVE_MAN2HTML_TRUE@MOSTLYCLEANFILES = ${html_DATA}
+@HAVE_MAN2HTML_TRUE@SUFFIXES = .html
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .html .1
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/man/man1/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu doc/man/man1/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-man1: $(man1_MANS)
+ @$(NORMAL_INSTALL)
+ test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
+ @list='$(man1_MANS)'; test -n "$(man1dir)" || exit 0; \
+ { for i in $$list; do echo "$$i"; done; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+ while read files; do \
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+ done; }
+
+uninstall-man1:
+ @$(NORMAL_UNINSTALL)
+ @list='$(man1_MANS)'; test -n "$(man1dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+install-htmlDATA: $(html_DATA)
+ @$(NORMAL_INSTALL)
+ test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \
+ done
+
+uninstall-htmlDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir)
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(MANS) $(DATA)
+installdirs:
+ for dir in "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(htmldir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+ -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-htmlDATA install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-htmlDATA uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-htmlDATA install-info install-info-am install-man \
+ install-man1 install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am uninstall uninstall-am uninstall-htmlDATA \
+ uninstall-man uninstall-man1
+
+
+@HAVE_MAN2HTML_TRUE@.1.html:
+@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1
index 912b585..c708fa5 100644
--- a/doc/man/man1/sacct.1
+++ b/doc/man/man1/sacct.1
@@ -29,6 +29,13 @@
with the \f3\-\-allusers\fP, \f3\-\-user\fP, or \f3\-\-uid\fP options.
.TP "7"
\f3Note: \fP\c
+If designated, the slurmdbd.conf option PrivateData may further
+restrict the accounting data visible to users which are not
+SlurmUser, root, or a user with AdminLevel=Admin. See the
+slurmdbd.conf man page for additional details on restricting
+access to accounting data.
+.TP
+\f3Note: \fP\c
If the AccountingStorageType is set to "accounting_storage/filetxt",
space characters embedded within account names, job names, and step names
will be replaced by underscores. If account names with embedded spaces are
@@ -247,6 +254,11 @@
.IP
.TP
+\f3\-\-name=\fP\f2jobname_list\fP
+Display jobs that have any of these name(s).
+.IP
+
+.TP
\f3\-o\fP\f3,\fP \f3\-\-format\fP
Comma separated list of fields. (use "\-\-helpformat" for a list of
available fields).
@@ -787,7 +799,7 @@
UTC, January 1, 1970)
.TP
5
-\f3uid.gid\fP
+blockid (if job ran on a BlueGene cluster)
.TP
6
(Reserved)
@@ -845,7 +857,7 @@
UTC, January 1, 1970)
.TP
5
-\f3uid.gid\fP
+blockid (if job ran on a BlueGene cluster)
.TP
6
(Reserved)
@@ -995,7 +1007,7 @@
UTC, January 1, 1970)
.TP
5
-\f3uid.gid\fP
+blockid (if job ran on a BlueGene cluster)
.TP
6
(Reserved)
diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1
index 34447e7..730dbd1 100644
--- a/doc/man/man1/sacctmgr.1
+++ b/doc/man/man1/sacctmgr.1
@@ -271,6 +271,14 @@
they are able to run inside the limit.
.TP
+\fIGrpCPURunMins\fP=<max cpu run minutes>
+Maximum number of CPU minutes all jobs
+running with this association and all it's child associations can run
+at the same time. This takes into consideration time limit of running
+jobs. If the limit is reached no new jobs are started until other
+jobs finish to allow time to free up.
+
+.TP
\fIGrpCPUs\fP=<max cpus>
Maximum number of CPUs running jobs are able to be allocated in aggregate for
this association and all associations which are children of this association.
@@ -287,6 +295,16 @@
To clear a previously set value use the modify command with a new value of \-1.
.TP
+\fIGrpMemory\fP=<max memory (MB) >
+Maximum amount of memory running jobs are able to be allocated in aggregate for
+this association and all associations which are children of this association.
+To clear a previously set value use the modify command with a new
+value of \-1.
+.P
+NOTE: This limit only applies fully when using the Select Consumable
+Resource plugin.
+
+.TP
\fIGrpNodes\fP=<max nodes>
Maximum number of nodes running jobs are able to be allocated in aggregate for
this association and all associations which are children of this association.
@@ -555,6 +573,14 @@
of this association.
.TP
+\fIGrpCPURunMins\fP
+Maximum number of CPU minutes all jobs
+running with this association and all it's child associations can run
+at the same time. This takes into consideration time limit of running
+jobs. If the limit is reached no new jobs are started until other
+jobs finish to allow time to free up.
+
+.TP
\fIGrpCPUs\fP
Maximum number of CPUs running jobs are able to be allocated in aggregate for
this association and all associations which are children of this association.
@@ -932,6 +958,12 @@
\fIPartitionTimeLimit\fP
If set jobs using this QOS will be able to
override the requested partition's TimeLimit.
+.TP
+\fIRequiresReservaton\fP
+If set jobs using this QOS must designate a reservation when submitting a job.
+This option can be useful in restricting usage of a QOS that may have greater
+preemptive capability or additional resources to be allowed only within a
+reservation.
.RE
.TP
@@ -945,6 +977,13 @@
aggregate for this QOS.
.TP
+\fIGrpCPURunMins\fP Maximum number of CPU minutes all jobs
+running with this QOS can run at the same time. This takes into
+consideration time limit of running jobs. If the limit is reached
+no new jobs are started until other jobs finish to allow time to
+free up.
+
+.TP
\fIGrpCPUs\fP
Maximum number of CPUs running jobs are able to be allocated in aggregate for
this QOS.
diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1
index fce3bcb..947c967 100644
--- a/doc/man/man1/salloc.1
+++ b/doc/man/man1/salloc.1
@@ -1,4 +1,4 @@
-.TH "salloc" "1" "SLURM 2.3" "August 2011" "SLURM Commands"
+.TH "salloc" "1" "SLURM 2.4" "October 2011" "SLURM Commands"
.SH "NAME"
salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command,
@@ -21,8 +21,8 @@
\fBSallocDefaultCommand\fR is not set, then \fBsalloc\fR runs the
user's default shell.
-The following document describes the the influence of various options on the
-allocation of cpus to jobs and tasks.
+The following document describes the the influence of various options on the
+allocation of cpus to jobs and tasks.
.br
http://www.schedmd.com/slurmdocs/cpu_management.html
@@ -136,31 +136,53 @@
.TP
\fB\-C\fR, \fB\-\-constraint\fR=<\fIlist\fR>
-Specify a list of constraints.
-The constraints are features that have been assigned to the nodes by
-the slurm administrator.
-The \fIlist\fR of constraints may include multiple features separated
-by ampersand (AND) and/or vertical bar (OR) operators.
-For example: \fB\-\-constraint="opteron&video"\fR or
-\fB\-\-constraint="fast|faster"\fR.
-In the first example, only nodes having both the feature "opteron" AND
-the feature "video" will be used.
-There is no mechanism to specify that you want one node with feature
-"opteron" and another node with feature "video" in case no
-node has both features.
+Nodes can have \fBfeatures\fR assigned to them by the SLURM administrator.
+Users can specify which of these \fBfeatures\fR are required by their job
+using the constraint option.
+Only nodes having features matching the job constraints will be used to
+satisfy the request.
+Multiple constraints may be specified with AND, OR, exclusive OR,
+resource counts, etc.
+Supported \fbconstraint\fR options include:
+.PD 1
+.RS
+.TP
+\fBSingle Name\fR
+Only nodes which have the specified feature will be used.
+For example, \fB\-\-constraint="intel"\fR
+.TP
+\fBNode Count\fB
+A request can specify the number of nodes needed with some feature
+by appending an asterisk and count after the feature name.
+For example "\fB\-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
+indicates that the job requires 16 nodes at that at least four of those
+nodes must have the feature "graphics."
+.TP
+\fBAND\fR
+If only nodes with all of specified features will be used.
+The ampersand is used for an AND operator.
+For example, \fB\-\-constraint="intel&gpu"\fR
+.TP
+\fBOR\fR
+If only nodes with at least one of specified features will be used.
+The vertical bar is used for an OR operator.
+For example, \fB\-\-constraint="intel|amd"\fR
+.TP
+\fBExclusive OR\fR
If only one of a set of possible options should be used for all allocated
nodes, then use the OR operator and enclose the options within square brackets.
For example: "\fB\-\-constraint=[rack1|rack2|rack3|rack4]"\fR might
be used to specify that all nodes must be allocated on a single rack of
the cluster, but any of those four racks can be used.
-A request can also specify the number of nodes needed with some feature
-by appending an asterisk and count after the feature name.
-For example "\fBsalloc \-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
-indicates that the job requires 16 nodes at that at least four of those
-nodes must have the feature "graphics."
-Constraints with node counts may only be combined with AND operators.
-If no nodes have the requested features, then the job will be rejected
-by the slurm job manager.
+.TP
+\fBMultiple Counts\fR
+Specific counts of multiple resources may be specified by using the AND
+operator and enclosing the options within square brackets.
+For example: "\fB\-\-constraint=[rack1*2&rack2*4]"\fR might
+be used to specify that two nodes must be allocated from nodes with the feature
+of "rack1" and four nodes must be allocated from nodes with the feature
+"rack2".
+.RE
.TP
\fB\-\-contiguous\fR
@@ -187,9 +209,9 @@
The following informational environment variables are set when \fB\-\-cpu_bind\fR
is in use:
.nf
- SLURM_CPU_BIND_VERBOSE
- SLURM_CPU_BIND_TYPE
- SLURM_CPU_BIND_LIST
+ SLURM_CPU_BIND_VERBOSE
+ SLURM_CPU_BIND_TYPE
+ SLURM_CPU_BIND_LIST
.fi
See the \fBENVIRONMENT VARIABLE\fR section for a more detailed description
@@ -431,7 +453,10 @@
If no argument is given, resources must be available immediately
for the request to succeed.
By default, \fB\-\-immediate\fR is off, and the command
-will block until resources become available.
+will block until resources become available. Since this option's
+argument is optional, for proper parsing the single letter option must
+be followed immediately with the value and not include a space between
+them. For example "\-I60" and not "\-I 60".
.TP
\fB\-J\fR, \fB\-\-job\-name\fR=<\fIjobname\fR>
@@ -454,8 +479,11 @@
couple of reasons: someone used \fBscancel\fR to revoke the allocation,
or the allocation reached its time limit. If you do not specify a signal
name or number and SLURM is configured to signal the spawned command at job
-termination, the default signal is SIGHUP for interactive and SIGTERM for
-non\-interactive sessions.
+termination, the default signal is SIGHUP for interactive and SIGTERM for
+non\-interactive sessions. Since this option's argument is optional,
+for proper parsing the single letter option must be followed
+immediately with the value and not include a space between them. For
+example "\-K1" and not "\-K 1".
.TP
\fB\-k\fR, \fB\-\-no\-kill\fR
@@ -545,8 +573,13 @@
requested and be one per line or comma separated. If specifying a
task count (\fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR>), your tasks
will be laid out on the nodes in the order of the file.
-.TP
+.br
+\fBNOTE:\fR The arbitrary distribution option on a job allocation only
+controls the nodes to be allocated to the job and not the allocation of
+CPUs on those nodes. This option is meant primarily to control a job step's
+task layout in an existing job allocation for the srun command.
+.TP
Second distribution method:
.TP
.B block
@@ -581,6 +614,10 @@
are allocated to jobs (\fBSelectType=select/linear\fR).
Also see \fB\-\-mem\-per\-cpu\fR.
\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+NOTE: Enforcement of memory limits currently requires enabling of
+accounting, which samples memory use on a periodic basis (data need
+not be stored, just collected). A task may exceed the memory limit
+until the next periodic accounting sample.
.TP
\fB\-\-mem\-per\-cpu\fR=<\fIMB\fR>
@@ -591,7 +628,7 @@
Note that if the job's \fB\-\-mem\-per\-cpu\fR value exceeds the configured
\fBMaxMemPerCPU\fR, then the user's limit will be treated as a memory limit
per task; \fB\-\-mem\-per\-cpu\fR will be reduced to a value no larger than
-\fBMaxMemPerCPU\fR; \fB\-\-cpus\-per\-task\fR will be set and value of
+\fBMaxMemPerCPU\fR; \fB\-\-cpus\-per\-task\fR will be set and value of
\fB\-\-cpus\-per\-task\fR multiplied by the new \fB\-\-mem\-per\-cpu\fR
value will equal the original \fB\-\-mem\-per\-cpu\fR value specified by
the user.
@@ -622,9 +659,9 @@
\fB\-\-mem_bind\fR is in use:
.nf
- SLURM_MEM_BIND_VERBOSE
- SLURM_MEM_BIND_TYPE
- SLURM_MEM_BIND_LIST
+ SLURM_MEM_BIND_VERBOSE
+ SLURM_MEM_BIND_TYPE
+ SLURM_MEM_BIND_LIST
.fi
See the \fBENVIRONMENT VARIABLES\fR section for a more detailed description
@@ -786,7 +823,7 @@
SLURM job is subject to the normal constraints on jobs, including time
limits, so that eventually the job will terminate and the resources
will be freed, or you can terminate the job manually using the
-\fBscancel\fR command.)
+\fBscancel\fR command.)
.TP
\fB\-O\fR, \fB\-\-overcommit\fR
@@ -854,7 +891,7 @@
desired for the job allocation and optionally the maximum time to wait
for that number of switches. If SLURM finds an allocation containing more
switches than the count specified, the job remains pending until it either finds
-an allocation with desired switch count or the time limit expires.
+an allocation with desired switch count or the time limit expires.
It there is no switch count limit, there is no delay in starting the job.
Acceptable time formats include "minutes", "minutes:seconds",
"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
@@ -869,8 +906,8 @@
Set a limit on the total run time of the job allocation. If the
requested time limit exceeds the partition's time limit, the job will
be left in a PENDING state (possibly indefinitely). The default time
-limit is the partition's time limit. When the time limit is reached,
-the each task in each job step is sent SIGTERM followed by SIGKILL. The
+limit is the partition's default time limit. When the time limit is reached,
+each task in each job step is sent SIGTERM followed by SIGKILL. The
interval between signals is specified by the SLURM configuration
parameter \fBKillWait\fR. A time limit of zero requests that no time
limit be imposed. Acceptable time formats include "minutes",
@@ -888,12 +925,12 @@
Set a minimum time limit on the job allocation.
If specified, the job may have it's \fB\-\-time\fR limit lowered to a value
no lower than \fB\-\-time\-min\fR if doing so permits the job to begin
-execution earlier than otherwise possible.
+execution earlier than otherwise possible.
The job's time limit will not be changed after the job is allocated resources.
-This is performed by a backfill scheduling algorithm to allocate resources
+This is performed by a backfill scheduling algorithm to allocate resources
otherwise reserved for higher priority jobs.
-Acceptable time formats include "minutes", "minutes:seconds",
-"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
+Acceptable time formats include "minutes", "minutes:seconds",
+"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
"days\-hours:minutes:seconds".
.TP
@@ -975,23 +1012,28 @@
.TP
\fB\-\-conn\-type\fR=<\fItype\fR>
-Require the partition connection type to be of a certain type.
+Require the block connection type to be of a certain type.
On Blue Gene the acceptable of \fItype\fR are MESH, TORUS and NAV.
-If NAV, or if not set, then SLURM will try to fit a TORUS else MESH.
+If NAV, or if not set, then SLURM will try to fit a what the
+DefaultConnType is set to in the bluegene.conf if that isn't set the
+default is TORUS.
You should not normally set this option.
-SLURM will normally allocate a TORUS if possible for a given geometry.
If running on a BGP system and wanting to run in HTC mode (only for 1
midplane and below). You can use HTC_S for SMP, HTC_D for Dual, HTC_V
for virtual node mode, and HTC_L for Linux mode.
-A comma separated lists of connection types may be specified, one for each dimension.
+For systems that allow a different connection type per dimension you
+can supply a comma separated list of connection types may be specified, one for
+each dimension (i.e. M,T,T,T will give you a torus connection is all
+dimensions expect the first).
.TP
\fB\-g\fR, \fB\-\-geometry\fR=<\fIXxYxZ\fR>
Specify the geometry requirements for the job. The three numbers
represent the required geometry giving dimensions in the X, Y and
Z directions. For example "\-\-geometry=2x3x4", specifies a block
-of nodes having 2 x 3 x 4 = 24 nodes (actually base partitions on
-Blue Gene).
+of nodes having 2 x 3 x 4 = 24 nodes (actually midplanes on
+Blue Gene). On a BGQ system the dimensions are AxXxYxZ, and can not
+be used to allocation sub-blocks.
.TP
\fB\-\-ioload\-image\fR=<\fIpath\fR>
@@ -1144,6 +1186,12 @@
\fBSLURM_SUBMIT_DIR\fR
The directory from which \fBsalloc\fR was invoked.
.TP
+\fBSLURM_NODE_ALIASES\fR
+Sets of node name, communication address and hostname for nodes allocated to
+the job from the cloud. Each element in the set if colon separated and each
+set is comma separated. For example:
+SLURM_NODE_ALIASES=ec0:1.2.3.4:foo,ec1:1.2.3.5:bar
+.TP
\fBSLURM_NTASKS_PER_NODE\fR
Set to value of the \-\-ntasks\-per\-node\fR option, if specified.
.TP
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index 750d2a2..38385f4 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -1,4 +1,4 @@
-.TH "sbatch" "1" "SLURM 2.3" "August 2011" "SLURM Commands"
+.TH "sbatch" "1" "SLURM 2.4" "October 2011" "SLURM Commands"
.SH "NAME"
sbatch \- Submit a batch script to SLURM.
@@ -145,31 +145,53 @@
.TP
\fB\-C\fR, \fB\-\-constraint\fR=<\fIlist\fR>
-Specify a list of constraints.
-The constraints are features that have been assigned to the nodes by
-the slurm administrator.
-The \fIlist\fR of constraints may include multiple features separated
-by ampersand (AND) and/or vertical bar (OR) operators.
-For example: \fB\-\-constraint="opteron&video"\fR or
-\fB\-\-constraint="fast|faster"\fR.
-In the first example, only nodes having both the feature "opteron" AND
-the feature "video" will be used.
-There is no mechanism to specify that you want one node with feature
-"opteron" and another node with feature "video" in case no
-node has both features.
+Nodes can have \fBfeatures\fR assigned to them by the SLURM administrator.
+Users can specify which of these \fBfeatures\fR are required by their job
+using the constraint option.
+Only nodes having features matching the job constraints will be used to
+satisfy the request.
+Multiple constraints may be specified with AND, OR, exclusive OR,
+resource counts, etc.
+Supported \fbconstraint\fR options include:
+.PD 1
+.RS
+.TP
+\fBSingle Name\fR
+Only nodes which have the specified feature will be used.
+For example, \fB\-\-constraint="intel"\fR
+.TP
+\fBNode Count\fB
+A request can specify the number of nodes needed with some feature
+by appending an asterisk and count after the feature name.
+For example "\fB\-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
+indicates that the job requires 16 nodes at that at least four of those
+nodes must have the feature "graphics."
+.TP
+\fBAND\fR
+If only nodes with all of specified features will be used.
+The ampersand is used for an AND operator.
+For example, \fB\-\-constraint="intel&gpu"\fR
+.TP
+\fBOR\fR
+If only nodes with at least one of specified features will be used.
+The vertical bar is used for an OR operator.
+For example, \fB\-\-constraint="intel|amd"\fR
+.TP
+\fBExclusive OR\fR
If only one of a set of possible options should be used for all allocated
nodes, then use the OR operator and enclose the options within square brackets.
For example: "\fB\-\-constraint=[rack1|rack2|rack3|rack4]"\fR might
be used to specify that all nodes must be allocated on a single rack of
the cluster, but any of those four racks can be used.
-A request can also specify the number of nodes needed with some feature
-by appending an asterisk and count after the feature name.
-For example "\fBsbatch \-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
-indicates that the job requires 16 nodes and that at least four of those
-nodes must have the feature "graphics."
-Constraints with node counts may only be combined with AND operators.
-If no nodes have the requested features, then the job will be rejected
-by the slurm job manager.
+.TP
+\fBMultiple Counts\fR
+Specific counts of multiple resources may be specified by using the AND
+operator and enclosing the options within square brackets.
+For example: "\fB\-\-constraint=[rack1*2&rack2*4]"\fR might
+be used to specify that two nodes must be allocated from nodes with the feature
+of "rack1" and four nodes must be allocated from nodes with the feature
+"rack2".
+.RE
.TP
\fB\-\-contiguous\fR
@@ -473,6 +495,7 @@
necessary to grant its job allocation are immediately available. If the
job allocation will have to wait in a queue of pending jobs, the batch script
will not be submitted.
+NOTE: There is limitted support for this option with batch jobs.
.TP
\fB\-i\fR, \fB\-\-input\fR=<\fIfilename pattern\fR>
@@ -488,12 +511,11 @@
a percent sign "%" followed by a letter (e.g. %j).
Supported replacement symbols are:
-.PD 0
-.RS 10
+.PD
+.RS
.TP
\fB%j\fR
Job allocation number.
-.PD 0
.TP
\fB%N\fR
Node name. Only one file is created, so %N will be replaced by the name of the
@@ -609,8 +631,13 @@
requested and be one per line or comma separated. If specifying a
task count (\fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR>), your tasks
will be laid out on the nodes in the order of the file.
-.TP
+.br
+\fBNOTE:\fR The arbitrary distribution option on a job allocation only
+controls the nodes to be allocated to the job and not the allocation of
+CPUs on those nodes. This option is meant primarily to control a job step's
+task layout in an existing job allocation for the srun command.
+.TP
Second distribution method:
.TP
.B block
@@ -645,6 +672,10 @@
are allocated to jobs (\fBSelectType=select/linear\fR).
Also see \fB\-\-mem\-per\-cpu\fR.
\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+NOTE: Enforcement of memory limits currently requires enabling of
+accounting, which samples memory use on a periodic basis (data need
+not be stored, just collected). A task may exceed the memory limit
+until the next periodic accounting sample.
.TP
\fB\-\-mem\-per\-cpu\fR=<\fIMB\fR>
@@ -993,7 +1024,7 @@
Set a limit on the total run time of the job allocation. If the
requested time limit exceeds the partition's time limit, the job will
be left in a PENDING state (possibly indefinitely). The default time
-limit is the partition's time limit. When the time limit is reached,
+limit is the partition's default time limit. When the time limit is reached,
each task in each job step is sent SIGTERM followed by SIGKILL. The
interval between signals is specified by the SLURM configuration
parameter \fBKillWait\fR. A time limit of zero requests that no time
@@ -1107,23 +1138,28 @@
.TP
\fB\-\-conn\-type\fR=<\fItype\fR>
-Require the partition connection type to be of a certain type.
+Require the block connection type to be of a certain type.
On Blue Gene the acceptable of \fItype\fR are MESH, TORUS and NAV.
-If NAV, or if not set, then SLURM will try to fit a TORUS else MESH.
+If NAV, or if not set, then SLURM will try to fit a what the
+DefaultConnType is set to in the bluegene.conf if that isn't set the
+default is TORUS.
You should not normally set this option.
-SLURM will normally allocate a TORUS if possible for a given geometry.
If running on a BGP system and wanting to run in HTC mode (only for 1
midplane and below). You can use HTC_S for SMP, HTC_D for Dual, HTC_V
for virtual node mode, and HTC_L for Linux mode.
-A comma separated lists of connection types may be specified, one for each dimension.
+For systems that allow a different connection type per dimension you
+can supply a comma separated list of connection types may be specified, one for
+each dimension (i.e. M,T,T,T will give you a torus connection is all
+dimensions expect the first).
.TP
\fB\-g\fR, \fB\-\-geometry\fR=<\fIXxYxZ\fR>
Specify the geometry requirements for the job. The three numbers
represent the required geometry giving dimensions in the X, Y and
Z directions. For example "\-\-geometry=2x3x4", specifies a block
-of nodes having 2 x 3 x 4 = 24 nodes (actually base partitions on
-Blue Gene).
+of nodes having 2 x 3 x 4 = 24 nodes (actually midplanes on
+Blue Gene). On a BGQ system the dimensions are AxXxYxZ, and can not
+be used to allocation sub-blocks.
.TP
\fB\-\-ioload\-image\fR=<\fIpath\fR>
@@ -1299,6 +1335,12 @@
\fBMPIRUN_NOFREE\fR
Do not free a block on Blue Gene systems only.
.TP
+\fBSLURM_NODE_ALIASES\fR
+Sets of node name, communication address and hostname for nodes allocated to
+the job from the cloud. Each element in the set if colon separated and each
+set is comma separated. For example:
+SLURM_NODE_ALIASES=ec0:1.2.3.4:foo,ec1:1.2.3.5:bar
+.TP
\fBSLURM_NTASKS_PER_CORE\fR
Number of tasks requested per core.
Only set if the \fB\-\-ntasks\-per\-core\fR option is specified.
diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1
index 6ea932a..06221ef 100644
--- a/doc/man/man1/scontrol.1
+++ b/doc/man/man1/scontrol.1
@@ -1,4 +1,4 @@
-.TH SCONTROL "1" "July 2011" "scontrol 2.3" "Slurm components"
+.TH SCONTROL "1" "August 2011" "scontrol 2.4" "Slurm components"
.SH "NAME"
scontrol \- Used view and modify Slurm configuration and state.
@@ -30,7 +30,9 @@
group.
.TP
\fB\-d\fR, \fB\-\-details\fR
-Causes the \fBshow\fR command to provide additional details where available.
+Causes the \fIshow\fR command to provide additional details where available.
+Repeating the option more than once (e.g., "\-dd") will cause the \fIshow job\fR
+command to also list the batch script, if the job was a batch job.
.TP
\fB\-h\fR, \fB\-\-help\fR
Print a help message describing the usage of scontrol.
@@ -151,8 +153,6 @@
.TP
\fBdetails\fP
Causes the \fIshow\fP command to provide additional details where available.
-Batch job information will include the batch script for jobs the user is
-authorized to view.
Job information will include CPUs and NUMA memory allocated on each node.
Note that on computers with hyperthreading enabled and SLURM configured to
allocate cores, each listed CPU represents one physical core.
@@ -161,7 +161,8 @@
See the \fB\-\-cpu_bind\fR and \fB\-\-mem_bind\fR option descriptions in
srun man pages for more information.
The \fBdetails\fP option is currently only supported for the \fIshow job\fP
-command.
+command. To also list the batch script for batch jobs, in addition to the
+details, use the \fBscript\fP option described below instead of this option.
.TP
\fBexit\fP
@@ -232,6 +233,12 @@
Terminate the execution of scontrol.
.TP
+\fBreboot_nodes\fP [\fINodeList\fP]
+Reboot all nodes in the system when they become idle using the
+\fBRebootProgram\fP as configured in SLURM's slurm.conf file.
+Accepts an option list of nodes to reboot. By default all nodes are rebooted.
+
+.TP
\fBreconfigure\fP
Instruct all Slurm daemons to re\-read the configuration file.
This command does not restart the daemons.
@@ -270,6 +277,12 @@
SlurmSchedLogLevel parameter is present.
.TP
+\fBscript\fP
+Causes the \fIshow job\fP command to list the batch script for batch
+jobs in addition to the detail information described under the
+\fBdetails\fP option above.
+
+.TP
\fBsetdebug\fP \fILEVEL\fP
Change the debug level of the slurmctld daemon.
\fILEVEL\fP may be an integer value between zero and nine (using the
@@ -295,7 +308,7 @@
\fIENTITY\fP may be \fIaliases\fP, \fIconfig\fP, \fIdaemons\fP, \fIfrontend\fP,
\fIjob\fP, \fInode\fP, \fIpartition\fP, \fIreservation\fP, \fIslurmd\fP,
\fIstep\fP, \fItopology\fP, \fIhostlist\fP or \fIhostnames\fP
-(also \fIblock\fP or \fIsubbp\fP on BlueGene systems).
+(also \fIblock\fP or \fIsubmp\fP on BlueGene systems).
\fIID\fP can be used to identify a specific element of the identified
entity: the configuration parameter name, job ID, node name, partition name,
reservation name, or job step ID for \fIconfig\fP, \fIjob\fP, \fInode\fP,
@@ -1005,6 +1018,13 @@
Possible values are "YES" and "NO".
.TP
+\fIReqResv\fP=<yes|no>
+Specify if only allocation requests designating a reservation will be
+satisfied. This is used to restrict partition usage to be allowed only
+within a reservation.
+Possible values are "YES" and "NO".
+
+.TP
\fIShared\fP=<yes|no|exclusive|force>[:<job_count>]
Specify if nodes in this partition can be shared by multiple jobs.
Possible values are "YES", "NO", "EXCLUSIVE" and "FORCE".
@@ -1055,7 +1075,9 @@
List of accounts permitted to use the reserved nodes.
E.g. Accounts=physcode1,physcode2. A user in any of the accounts
may use the reserved nodes.
-A new reservation must specify Users and/or Accounts.
+A new reservation must specify Users and/or Accounts. You can add or
+remove individual accounts from an existing reservation by adding
+a '+' or '\-' sign before the '=' sign.
.TP
\fILicenses\fP=<license>
@@ -1066,12 +1088,20 @@
Multiple license names should be comma separated (e.g. "Licenses=foo*4,bar").
A new reservation must specify one or more resource to be included: NodeCnt,
Nodes and/or Licenses.
+If a reservation includes Licenses, but no NodeCnt or Nodes, then the option
+\fIFlags=LICENSE_ONLY\fP must also be specified.
.TP
-\fINodeCnt\fP=<num>
-Identify number of nodes to be reserved.
-On BlueGene systems, this number represents a cnode (compute node) count and
-will be rounded up as needed to represent whole nodes (midplanes).
+\fINodeCnt\fP=<num>[,num,...]
+Identify number of nodes to be reserved. The number can include a suffix of
+"k" or "K", in which case the number specified is multiplied by 1024.
+On BlueGene systems, this number represents a c\-node (compute node) count and
+will be rounded up as needed to reserve whole nodes (midplanes).
+In order to optimize the topology of the resource allocation on a new
+reservation (not on an updated reservation), specific sizes
+required for the reservation may be specified. For example, if you want to
+reserve 4096 c\-nodes on a BlueGene system that can be used to allocate two
+jobs each with 2048 c\-nodes, specify "NodeCnt=2k,2k".
A new reservation must specify one or more resource to be included: NodeCnt,
Nodes and/or Licenses.
@@ -1081,7 +1111,7 @@
may be specified using simple node range expressions (e.g. "Nodes=lx[10\-20]").
Specify a blank data value to remove all nodes from a reservation: "Nodes=".
A new reservation must specify one or more resource to be included: NodeCnt,
-Nodes and/or Licenses.
+Nodes and/or Licenses. A specification of "ALL" will reserve all nodes.
.TP
\fIStartTime\fP=<time_spec>
@@ -1122,9 +1152,9 @@
.TP
\fIFlags\fP=<flags>
Flags associated with the reservation.
-In order to remove a flag with the update option, precede the name with
-a minus sign. For example: Flags=\-DAILY (NOTE: this option is not supported
-for all flags).
+You can add or remove individual flags from an existing reservation by
+adding a '+' or '\-' sign before the '=' sign. For example:
+Flags\-=DAILY (NOTE: this shortcut is not supported for all flags).
Currently supported flags include:
.RS
.TP 12
@@ -1157,6 +1187,11 @@
.TP
\fISPEC_NODES\fR
Reservation is for specific nodes (output only)
+.TP
+\fISTATIC_ALLOC\fR
+Make it so after the nodes are selected for a reservation they don't
+change. Without this option when nodes are selected for a reservation
+and one goes down the reservation will select a new node to fill the spot.
.RE
.TP
@@ -1170,27 +1205,50 @@
\fIUsers\fP=<user list>
List of users permitted to use the reserved nodes.
E.g. Users=jones1,smith2.
-A new reservation must specify Users and/or Accounts.
+A new reservation must specify Users and/or Accounts. You can add or
+remove individual users from an existing reservation by adding a '+'
+or '\-' sign before the '=' sign.
.TP
-\fBSPECIFICATIONS FOR UPDATE, BLOCK \fR
+\fBSPECIFICATIONS FOR UPDATE BLOCK/SUBMP \fR
.TP
Bluegene systems only!
.TP
\fIBlockName\fP=<name>
Identify the bluegene block to be updated. This specification is required.
.TP
-\fIState\fP=<free|error|remove>
-This will update the state of a bluegene block to either FREE or ERROR.
-(i.e. update BlockName=RMP0 STATE=ERROR) State error will not allow jobs
-to run on the block. \fBWARNING!!!!\fR This will cancel any
-running job on the block! On dynamically laid out systems REMOVE will
-free and remove the block from the system. If the block is smaller
-than a midplane every block on that midplane will be removed.
+\fIState\fP=<free|error|recreate|remove|resume>
+This will update the state of a bluegene block.
+(i.e. update BlockName=RMP0 STATE=ERROR)
+\fBWARNING!!!!\fR With the exception of the RESUME state, all other
+state values will cancel any running job on the block!
+.RS
+.TP 10
+\fIFREE\fP
+Return the block to a free state.
.TP
-\fISubBPName\fP=<name>
+\fIERROR\fP
+Make it so jobs don't run on the block.
+.TP
+\fIRECREATE\fP
+Destroy the current block and create a new one to take its place.
+.TP
+\fIREMOVE\fP
+Free and remove the block from the system. If the block is smaller
+than a midplane every block on that midplane will be removed. (only
+available on dynamic laid out systems)
+.TP
+\fIRESUME\fP
+If a block is in ERROR state RESUME will return the block to its
+previous usable state (FREE or READY).
+.RE
+
+.TP
+\fISubMPName\fP=<name>
Identify the bluegene ionodes to be updated (i.e. bg000[0\-3]). This
specification is required.
+NOTE: Even on BGQ where node names are given in bg0000[00000] format
+this option takes an ionode name bg0000[0].
.TP
.SH "ENVIRONMENT VARIABLES"
diff --git a/doc/man/man1/sdiag.1 b/doc/man/man1/sdiag.1
new file mode 100644
index 0000000..f4161b1
--- /dev/null
+++ b/doc/man/man1/sdiag.1
@@ -0,0 +1,206 @@
+.TH "sdiag" "1" "SLURM 2.4" "December 2011" "SLURM Commands"
+.SH "NAME"
+.LP
+sdiag \- Diagnostic tool for SLURM
+
+.SH "SYNOPSIS"
+.LP
+sview
+
+.SH "DESCRIPTION"
+.LP
+sdiag shows information related to slurmctld execution about: threads, agents,
+jobs, and scheduling algorithms. The goal is to obtain data from slurmctld
+behaviour helping to adjust configuration parameters or queues policies. The
+main reason behind is to know SLURM behaviour under systems with a high throughput.
+.LP
+It has two execution modes. The default mode \fB\-\-all\fR shows several counters
+and statistics explained later, and there is another execution option
+\fB\-\-reset\fR for resetting those values.
+.LP
+Values are reset at midnight UTC time by default.
+.LP
+The first block of information is related to global slurmctld execution:
+.TP
+\fBServer thread count\fR
+The number of current active slurmctld threads. A high number would mean a high
+load processing events like job submissions, jobs dispatching, jobs completing,
+etc. If this is often close to MAX_SERVER_THREADS it could point to a potential
+bottleneck.
+
+.TP
+\fBAgent queue size\fR
+SLURM design has scalability in mind and sending messages to thousands of nodes
+is not a trivial task. The agent mechanism helps to control communication
+between the slurm daemons and the controller for a best effort. If this values
+is close to MAX_AGENT_CNT there could be some delays affecting jobs management.
+
+.TP
+\fBJobs submitted\fR
+Number of jobs submitted since last reset
+
+.TP
+\fBJobs started\fR
+Number of jobs started since last reset. This includes backfilled jobs.
+
+.TP
+\fBJobs completed\fR
+Number of jobs completed since last reset.
+
+.TP
+\fBJobs canceled\fR
+Number of jobs canceled since last reset.
+
+.TP
+\fBJobs failed\fR
+Number of jobs failed since last reset.
+
+.LP
+The second block of information is related to main scheduling algorithm based
+on jobs priorities. A scheduling cycle implies to get the job_write_lock lock,
+then trying to get resources for jobs pending, starting from the most priority
+one and going in descendent order. Once a job can not get the resources the
+loop keeps going but just for jobs requesting other partitions. Jobs with
+dependencies or affected by accounts limits are not processed.
+
+.TP
+\fBLast cycle\fR
+Time in microseconds for last scheduling cycle.
+
+.TP
+\fBMax cycle\fR
+Time in microseconds for the maximum scheduling cycle since last reset.
+
+.TP
+\fBTotal cycles\fR
+Number of scheduling cycles since last reset. Scheduling is done in
+periodically and when a job is submitted or a job is completed.
+
+.TP
+\fBMean cycle\fR
+Mean of scheduling cycles since last reset
+
+.TP
+\fBMean depth cycle\fR
+Mean of cycle depth. Depth means number of jobs processed in a scheduling cycle.
+
+.TP
+\fBCycles per minute\fR
+Counter of scheduling executions per minute
+
+.TP
+\fBLast queue length\fR
+Length of jobs pending queue.
+
+.LP
+The third block of information is related to backfilling scheduling algorithm.
+A backfilling scheduling cycle implies to get locks for jobs, nodes and
+partitions objects then trying to get resources for jobs pending. Jobs are
+processed based on priorities. If a job can not get resources the algorithm
+calculates when it could get them obtaining a future start time for the job.
+Then next job is processed and the algorithm tries to get resources for that
+job but avoiding to affect the \fIprevious ones\fR, and again it calculates
+the future start time if not current resources available. The backfilling
+algorithm takes more time for each new job to process since more priority jobs
+can not be affected. The algorithm itself takes measures for avoiding a long
+execution cycle and for taking all the locks for too long.
+
+.TP
+\fBTotal backfilled jobs (since last slurm start)\fR
+Number of jobs started thanks to backfilling since last slurm start.
+
+.TP
+\fBTotal backfilled jobs (since last stats cycle start)\fR
+Number of jobs started thanks to backfilling since last time stats where reset.
+By default these values are reset at midnight UTC time.
+
+.TP
+\fBTotal cycles\fR
+Number of scheduling cycles since last reset
+
+.TP
+\fBLast cycle when\fR
+Time when last execution cycle happened in format
+"weekday Month MonthDay hour:minute.seconds year"
+
+.TP
+\fBLast cycle\fR
+Time in microseconds of last backfilling cycle. It counts only execution time
+removing sleep time inside a scheduling cycle when it takes too much.
+
+.TP
+\fBMax cycle\fR
+Time in microseconds of maximum backfilling cycle execution since last reset
+
+.TP
+\fBMean cycle\fR
+Mean of backfilling scheduling cycles in microseconds since last reset
+
+
+.TP
+\fBLast depth cycle\fR
+Number of processed jobs during last backfilling scheduling cycle. It counts
+every process even if it has no option to execute due to dependencies or limits.
+
+.TP
+\fBLast depth cycle (try sched)\fR
+Number of processed jobs during last backfilling scheduling cycle. It counts
+only processes with a chance to run waiting for available resources. These
+jobs are which makes the backfilling algorithm heavier.
+
+.TP
+\fBDepth Mean\fR
+Mean of processed jobs during backfilling scheduling cycles since last reset.
+
+.TP
+\fBDepth Mean (try sched)\fR
+Mean of processed jobs during backfilling scheduling cycles since last reset.
+It counts only processes with a chance to run waiting for available resources.
+These jobs are which makes the backfilling algorithm heavier.
+
+.TP
+\fBLast queue length\fR
+Number of jobs pending to be processed by backfilling algorithm. A job appears
+as much times as partitions it requested.
+
+.TP
+\fBQueue length Mean\fR
+Mean of jobs pending to be processed by backfilling algorithm.
+
+.SH "OPTIONS"
+.LP
+
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Get and report information. This is the default mode of operation.
+
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Print description of options and exit.
+
+.TP
+\fB\-r\fR, \fB\-\-reset\fR
+Reset counters. Only used by user SlurmUser or root.
+
+.TP
+\fB\-\-usage\fR
+Print list of options and exit.
+
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Print current version number and exit.
+
+.SH "COPYING"
+SLURM is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+.LP
+SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+details.
+
+.SH "SEE ALSO"
+.LP
+sinfo(1), squeue(1), scontrol(1), slurm.conf(5),
diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1
index d520899..bd5cae9 100644
--- a/doc/man/man1/sinfo.1
+++ b/doc/man/man1/sinfo.1
@@ -1,4 +1,4 @@
-.TH SINFO "1" "September 2011" "sinfo 2.3" "Slurm components"
+.TH SINFO "1" "April 2012" "sinfo 2.4" "Slurm components"
.SH "NAME"
sinfo \- view information about SLURM nodes and partitions.
@@ -96,10 +96,10 @@
"%N %.5D %9P %6t"
.TP
.I "\-\-long \-\-Node"
-"%N %.5D %9P %11T %.4c %.8z %.6m %.8d %.6w %8f %20R"
+"%N %.5D %9P %11T %.4c %.8z %.6m %.8d %.6w %8f %20E"
.TP
.I "\-\-list\-reasons"
-"%20R %9u %19H %N"
+"%20E %9u %19H %N"
.TP
.I "\-\-long \-\-list\-reasons"
"%20R %12u %19H %6t %N"
@@ -181,15 +181,17 @@
\fB%o\fR
List of node communication addresses
.TP
+\fB%p\fR
+Partition scheduling priority
+.TP
\fB%P\fR
-Partition name
+Partition name followed by "*" for the default partition, also see \fB%R\fR
.TP
\fB%r\fR
Only user root may initiate jobs, "yes" or "no"
.TP
\fB%R\fR
-The reason a node is unavailable (down, drained, draining,
-fail or failing states)
+Partition name, also see \fB%P\fR
.TP
\fB%s\fR
Maximum job size in nodes
diff --git a/doc/man/man1/smap.1 b/doc/man/man1/smap.1
index 1d77b44..c88acb0 100644
--- a/doc/man/man1/smap.1
+++ b/doc/man/man1/smap.1
@@ -9,7 +9,7 @@
.SH "DESCRIPTION"
\fBsmap\fR is used to graphically view job, partition and node information
for a system running SLURM.
-Note that information about nodes and partitions to which a user lacks
+Note that information about nodes and partitions to which you lack
access will always be displayed to avoid obvious gaps in the output.
This is equivalent to the \fB\-\-all\fR option of the \fBsinfo\fR and
\fBsqueue\fR commands.
@@ -21,20 +21,21 @@
.TP
\fB\-D <option>\fR, \fB\-\-display=<option>\fR
-sets the display mode for smap. Showing revelant information about specific
-views and displaying a corresponding node chart. While in any
-display a user can switch by typing a different view letter. This is true in
-all modes except for 'configure mode' user can type 'quit' to exit just
-configure mode. Typing 'exit' will end the configuration mode and exit smap.
-Note that unallocated nodes are indicated by a '.' and nodes in the
-DOWN, DRAINED or FAIL state by a '#'.
+sets the display mode for smap, showing revelant information about the
+selected view and displaying a corresponding node chart. Note that
+unallocated nodes are indicated by a '.' and nodes in the DOWN,
+DRAINED or FAIL state by a '#'. When the \fB\-\-iterate=<seconds>\fR
+option is also selected, you can switch displays by typing a different
+letter from the list below (except 'c').
.RS
.TP 15
.I "b"
Displays information about BlueGene partitions on the system
.TP
.I "c"
-Displays current BlueGene node states and allows users to configure the system.
+Displays current BlueGene node states and allows users to configure
+the system. Type 'quit' to end the configure mode. Type 'exit' to
+end the configuration mode and exit smap.
.TP
.I "j"
Displays information about jobs running on system.
@@ -107,16 +108,16 @@
Print version information and exit.
.SH "INTERACTIVE OPTIONS"
-When using smap in curses mode you can scroll through the different windows
+When using smap in curses mode and when the \fB\-\-iterate=<seconds>\fR
+option is also selected, you can scroll through the different windows
using the arrow keys. The \fBup\fR and \fBdown\fR arrow keys scroll
the window containing the grid, and the \fBleft\fR and \fBright\fR arrow keys
scroll the window containing the text information.
-To change screens when an iterate is set you can use any of the
-options available to the \fB\-D\fR option listed above.
-
-You can also hide or make visible hidden partitions by pressing
-\fBh\fR at any moment when an iterate is set.
+With the iterate option selected, you can use any of the options
+available to the \fB\-D\fR option listed above (except 'c') to change
+screens. You can also hide or make visible hidden partitions by
+pressing 'h' at any moment.
.SH "OUTPUT FIELD DESCRIPTIONS"
.TP
diff --git a/doc/man/man1/sprio.1 b/doc/man/man1/sprio.1
index a0d8532..1e4941f 100644
--- a/doc/man/man1/sprio.1
+++ b/doc/man/man1/sprio.1
@@ -26,7 +26,11 @@
.TP
\fB\-j <job_id_list>\fR, \fB\-\-jobs=<job_id_list>\fR
-Requests a comma separated list of job ids to display. Defaults to all jobs.
+Requests a comma separated list of job ids to display. Defaults to
+all jobs. Since this option's argument is optional, for proper parsing
+the single letter option must be followed immediately with the value
+and not include a space between them. For example "\-j1008,1009" and
+not "\-j 1008,1009".
.TP
\fB\-l\fR, \fB\-\-long\fR
diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1
index b2382f8..e03ee1b 100644
--- a/doc/man/man1/squeue.1
+++ b/doc/man/man1/squeue.1
@@ -49,7 +49,10 @@
The \fB\-\-jobs=<job_id_list>\fR option may be used in conjunction with the
\fB\-\-steps\fR option to print step information about specific jobs.
Note: If a list of job ids is provided, the jobs are displayed even if
-they are on hidden partitions.
+they are on hidden partitions. Since this option's argument is optional,
+for proper parsing the single letter option must be followed immediately
+with the value and not include a space between them. For example "\-j1008"
+and not "\-j 1008".
.TP
\fB\-l\fR, \fB\-\-long\fR
@@ -62,11 +65,9 @@
A value of of '\fIall\fR' will query to run on all clusters.
.TP
-\fB\-n <hostlist>\fR, \fB\-\-nodes=<hostlist>\fR
-Report only on jobs allocated to the specified node or list of nodes.
-This may either be the \fBNodeName\fR or \fBNodeHostname\fR
-as defined in \fBslurm.conf(5)\fR in the event that they differ.
-A node_name of \fBlocalhost\fR is mapped to the current host name.
+\fB\-n\fR, \fB\-\-name=<name_list>\fR
+Request jobs or job steps having one of the specified names. The
+list consists of a comma separated list of job names.
.TP
\fB\-o <output_format>\fR, \fB\-\-format=<output_format>\fR
@@ -356,7 +357,11 @@
\fB\-s\fR, \fB\-\-steps\fR
Specify the job steps to view. This flag indicates that a comma separated list
of job steps to view follows without an equal sign (see examples).
-The job step format is "job_id.step_id". Defaults to all job steps.
+The job step format is "job_id.step_id". Defaults to all job
+steps. Since this option's argument is optional, for proper parsing
+the single letter option must be followed immediately with the value
+and not include a space between them. For example "\-s1008.0" and not
+"\-s 1008.0".
.TP
\fB\-S <sort_list>\fR, \fB\-\-sort=<sort_list>\fR
@@ -415,6 +420,13 @@
\fB\-V\fR , \fB\-\-version\fR
Print version information and exit.
+.TP
+\fB\-w <hostlist>\fR, \fB\-\-nodelist=<hostlist>\fR
+Report only on jobs allocated to the specified node or list of nodes.
+This may either be the \fBNodeName\fR or \fBNodeHostname\fR
+as defined in \fBslurm.conf(5)\fR in the event that they differ.
+A node_name of \fBlocalhost\fR is mapped to the current host name.
+
.SH "JOB REASON CODES"
These codes identify the reason that a job is waiting for execution.
A job may be waiting for more than one reason, in which case only
@@ -536,6 +548,9 @@
\fBSQUEUE_ALL\fR
\fB\-a, \-\-all\fR
.TP
+\fBSQUEUE_NAMES\fR
+\fB\-\-name=<name_list>\fR
+.TP
\fBSQUEUE_FORMAT\fR
\fB\-o <output_format>, \-\-format=<output_format>\fR
.TP
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index f5da084..fcc86de 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1,4 +1,4 @@
-.TH "srun" "1" "SLURM 2.3" "August 2011" "SLURM Commands"
+.TH "srun" "1" "SLURM 2.4" "October 2011" "SLURM Commands"
.SH "NAME"
srun \- Run parallel jobs
@@ -10,8 +10,8 @@
Run a parallel job on cluster managed by SLURM. If necessary, srun will
first create a resource allocation in which to run the parallel job.
-The following document describes the the influence of various options on the
-allocation of cpus to jobs and tasks.
+The following document describes the the influence of various options on the
+allocation of cpus to jobs and tasks.
.br
http://www.schedmd.com/slurmdocs/cpu_management.html
@@ -127,32 +127,57 @@
.TP
\fB\-C\fR, \fB\-\-constraint\fR=<\fIlist\fR>
-Specify a list of constraints.
-The constraints are features that have been assigned to the nodes by
-the slurm administrator.
-The \fIlist\fR of constraints may include multiple features separated
-by ampersand (AND) and/or vertical bar (OR) operators.
-For example: \fB\-\-constraint="opteron&video"\fR or
-\fB\-\-constraint="fast|faster"\fR.
-In the first example, only nodes having both the feature "opteron" AND
-the feature "video" will be used.
-There is no mechanism to specify that you want one node with feature
-"opteron" and another node with feature "video" in case no
-node has both features.
+Nodes can have \fBfeatures\fR assigned to them by the SLURM administrator.
+Users can specify which of these \fBfeatures\fR are required by their job
+using the constraint option.
+Only nodes having features matching the job constraints will be used to
+satisfy the request.
+Multiple constraints may be specified with AND, OR, exclusive OR,
+resource counts, etc.
+Supported \fbconstraint\fR options include:
+.PD 1
+.RS
+.TP
+\fBSingle Name\fR
+Only nodes which have the specified feature will be used.
+For example, \fB\-\-constraint="intel"\fR
+.TP
+\fBNode Count\fB
+A request can specify the number of nodes needed with some feature
+by appending an asterisk and count after the feature name.
+For example "\fB\-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
+indicates that the job requires 16 nodes at that at least four of those
+nodes must have the feature "graphics."
+.TP
+\fBAND\fR
+If only nodes with all of specified features will be used.
+The ampersand is used for an AND operator.
+For example, \fB\-\-constraint="intel&gpu"\fR
+.TP
+\fBOR\fR
+If only nodes with at least one of specified features will be used.
+The vertical bar is used for an OR operator.
+For example, \fB\-\-constraint="intel|amd"\fR
+.TP
+\fBExclusive OR\fR
If only one of a set of possible options should be used for all allocated
nodes, then use the OR operator and enclose the options within square brackets.
For example: "\fB\-\-constraint=[rack1|rack2|rack3|rack4]"\fR might
be used to specify that all nodes must be allocated on a single rack of
the cluster, but any of those four racks can be used.
-A request can also specify the number of nodes needed with some feature
-by appending an asterisk and count after the feature name.
-For example "\fBsrun \-\-nodes=16 \-\-constraint=graphics*4 ..."\fR
-indicates that the job requires 16 nodes at that at least four of those
-nodes must have the feature "graphics."
-Constraints with node counts may only be combined with AND operators.
-If no nodes have the requested features, then the job will be rejected
-by the slurm job manager. This option is used for job allocations, but ignored
-for job step allocations.
+.TP
+\fBMultiple Counts\fR
+Specific counts of multiple resources may be specified by using the AND
+operator and enclosing the options within square brackets.
+For example: "\fB\-\-constraint=[rack1*2&rack2*4]"\fR might
+be used to specify that two nodes must be allocated from nodes with the feature
+of "rack1" and four nodes must be allocated from nodes with the feature
+"rack2".
+.RE
+
+\fBWARNING\fR: When srun is executed from within salloc or sbatch,
+the constraint value can only contain a single feature name. None of the
+other operators are currently supported for job steps.
.TP
\fB\-\-contiguous\fR
@@ -180,9 +205,9 @@
The following informational environment variables are set when \fB\-\-cpu_bind\fR
is in use:
.nf
- SLURM_CPU_BIND_VERBOSE
- SLURM_CPU_BIND_TYPE
- SLURM_CPU_BIND_LIST
+ SLURM_CPU_BIND_VERBOSE
+ SLURM_CPU_BIND_TYPE
+ SLURM_CPU_BIND_LIST
.fi
See the \fBENVIRONMENT VARIABLES\fR section for a more detailed description
@@ -314,7 +339,7 @@
threads per CPU for a total of two tasks.
\fBWARNING\fR: When srun is executed from within salloc or sbatch,
-there are configurations and options which can result in inconsistent
+there are configurations and options which can result in inconsistent
allocations when \-c has a value greater than \-c on salloc or sbatch.
.TP
\fB\-d\fR, \fB\-\-dependency\fR=<\fIdependency_list\fR>
@@ -468,7 +493,10 @@
If no argument is given, resources must be available immediately
for the request to succeed.
By default, \fB\-\-immediate\fR is off, and the command
-will block until resources become available.
+will block until resources become available. Since this option's
+argument is optional, for proper parsing the single letter option
+must be followed immediately with the value and not include a
+space between them. For example "\-I60" and not "\-I 60".
.TP
\fB\-i\fR, \fB\-\-input\fR=<\fImode\fR>
@@ -483,10 +511,10 @@
\fB\-J\fR, \fB\-\-job\-name\fR=<\fIjobname\fR>
Specify a name for the job. The specified name will appear along with
the job id number when querying running jobs on the system. The default
-is the supplied \fBexecutable\fR program's name. NOTE: This information
+is the supplied \fBexecutable\fR program's name. NOTE: This information
may be written to the slurm_jobacct.log file. This file is space delimited
-so if a space is used in the \fIjobname\fR name it will cause problems in
-properly displaying the contents of the slurm_jobacct.log file when the
+so if a space is used in the \fIjobname\fR name it will cause problems in
+properly displaying the contents of the slurm_jobacct.log file when the
\fBsacct\fR command is used.
.TP
@@ -505,18 +533,21 @@
argument will terminate the job.
Note: This option takes precedence over the \fB\-W\fR, \fB\-\-wait\fR option
to terminate the job immediately if a task exits with a non\-zero exit code.
+Since this option's argument is optional, for proper parsing the
+single letter option must be followed immediately with the value and
+not include a space between them. For example "\-K1" and not "\-K 1".
.TP
\fB\-k\fR, \fB\-\-no\-kill\fR
Do not automatically terminate a job of one of the nodes it has been
allocated fails. This option is only recognized on a job allocation,
not for the submission of individual job steps.
-The job will assume all responsibilities for fault\-tolerance.
+The job will assume all responsibilities for fault\-tolerance.
Tasks launch using this option will not be considered terminated
(e.g. \fB\-K\fR, \fB\-\-kill\-on\-bad\-exit\fR and
\fB\-W\fR, \fB\-\-wait\fR options will have no effect upon the job step).
The active job step (MPI job) will likely suffer a fatal error,
-but subsequent job steps may be run if this option is specified.
+but subsequent job steps may be run if this option is specified.
The default action is to terminate the job upon node failure.
.TP
@@ -600,8 +631,13 @@
requested and be one per line or comma separated. If specifying a
task count (\fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR>), your tasks
will be laid out on the nodes in the order of the file.
-.TP
+.br
+\fBNOTE:\fR The arbitrary distribution option on a job allocation only
+controls the nodes to be allocated to the job and not the allocation of
+CPUs on those nodes. This option is meant primarily to control a job step's
+task layout in an existing job allocation for the srun command.
+.TP
Second distribution method:
.TP
.B block
@@ -636,6 +672,10 @@
are allocated to jobs (\fBSelectType=select/linear\fR).
Also see \fB\-\-mem\-per\-cpu\fR.
\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+NOTE: Enforcement of memory limits currently requires enabling of
+accounting, which samples memory use on a periodic basis (data need
+not be stored, just collected). A task may exceed the memory limit
+until the next periodic accounting sample.
.TP
\fB\-\-mem\-per\-cpu\fR=<\fIMB\fR>
@@ -646,7 +686,7 @@
Note that if the job's \fB\-\-mem\-per\-cpu\fR value exceeds the configured
\fBMaxMemPerCPU\fR, then the user's limit will be treated as a memory limit
per task; \fB\-\-mem\-per\-cpu\fR will be reduced to a value no larger than
-\fBMaxMemPerCPU\fR; \fB\-\-cpus\-per\-task\fR will be set and value of
+\fBMaxMemPerCPU\fR; \fB\-\-cpus\-per\-task\fR will be set and value of
\fB\-\-cpus\-per\-task\fR multiplied by the new \fB\-\-mem\-per\-cpu\fR
value will equal the original \fB\-\-mem\-per\-cpu\fR value specified by
the user.
@@ -677,9 +717,9 @@
\fB\-\-mem_bind\fR is in use:
.nf
- SLURM_MEM_BIND_VERBOSE
- SLURM_MEM_BIND_TYPE
- SLURM_MEM_BIND_LIST
+ SLURM_MEM_BIND_VERBOSE
+ SLURM_MEM_BIND_TYPE
+ SLURM_MEM_BIND_LIST
.fi
See the \fBENVIRONMENT VARIABLES\fR section for a more detailed description
@@ -873,9 +913,9 @@
Overcommit resources. Normally, \fBsrun\fR
will not allocate more than one process per CPU. By specifying
\fB\-\-overcommit\fR you are explicitly allowing more than one process
-per CPU. However no more than \fBMAX_TASKS_PER_NODE\fR tasks are
-permitted to execute per node. NOTE: \fBMAX_TASKS_PER_NODE\fR is
-defined in the file \fIslurm.h\fR and is not a variable, it is set at
+per CPU. However no more than \fBMAX_TASKS_PER_NODE\fR tasks are
+permitted to execute per node. NOTE: \fBMAX_TASKS_PER_NODE\fR is
+defined in the file \fIslurm.h\fR and is not a variable, it is set at
SLURM build time.
.TP
@@ -941,7 +981,7 @@
.TP
\fBFSIZE\fR
The maximum size of files created. Note that if the user sets FSIZE to less
-than the current size of the slurmd.log, job launches will fail with
+than the current size of the slurmd.log, job launches will fail with
a 'File size limit exceeded' error.
.TP
\fBMEMLOCK\fR
@@ -965,7 +1005,7 @@
Execute task zero in pseudo terminal mode.
Implicitly sets \fB\-\-unbuffered\fR.
Implicitly sets \fB\-\-error\fR and \fB\-\-output\fR to /dev/null
-for all tasks except task zero, which may cause those tasks to
+for all tasks except task zero, which may cause those tasks to
exit immediately (e.g. shells will typically exit immediately
in that situation).
Not currently supported on AIX platforms.
@@ -996,7 +1036,7 @@
nodes of the current job. If \fB\-r\fR is used, the current job
step will begin at node \fIn\fR of the allocated nodelist, where
the first node is considered node 0. The \fB\-r\fR option is not
-permitted with \fB\-w\fR or \fB\-x\fR option and will result in a
+permitted with \fB\-w\fR or \fB\-x\fR option and will result in a
fatal error when not running within a prior allocation (i.e. when
SLURM_JOB_ID is not set). The default for \fIn\fR is 0. If the
value of \fB\-\-nodes\fR exceeds the number of nodes identified
@@ -1018,6 +1058,11 @@
be read (used by the checkpoint/blcrm and checkpoint/xlch plugins only).
.TP
+\fB\-\-runjob\-opts\fR=<\fIoptions\fR>
+Options to the runjob command.
+For use only on IBM BlueGene/Q systems.
+
+.TP
\fB\-s\fR, \fB\-\-share\fR
The job allocation can share nodes with other running jobs.
This is the opposite of \-\-exclusive, whichever option is seen last
@@ -1059,7 +1104,7 @@
desired for the job allocation and optionally the maximum time to wait
for that number of switches. If SLURM finds an allocation containing more
switches than the count specified, the job remains pending until it either finds
-an allocation with desired switch count or the time limit expires.
+an allocation with desired switch count or the time limit expires.
It there is no switch count limit, there is no delay in starting the job.
Acceptable time formats include "minutes", "minutes:seconds",
"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
@@ -1087,8 +1132,8 @@
the job will be left in a PENDING state (possibly indefinitely).
If the requested time limit for a job step exceeds the partition's
time limit, the job step will not be initiated. The default time
-limit is the partition's time limit. When the time limit is reached,
-the job's tasks are sent SIGTERM followed by SIGKILL. If the time
+limit is the partition's default time limit. When the time limit is reached,
+each task in each job step is sent SIGTERM followed by SIGKILL. The
limit is for the job, all job steps are signaled. If the time limit is
for a single job step within an existing job allocation, only that job
step will be affected. A job time limit supercedes all job step time
@@ -1138,12 +1183,12 @@
Set a minimum time limit on the job allocation.
If specified, the job may have it's \fB\-\-time\fR limit lowered to a value
no lower than \fB\-\-time\-min\fR if doing so permits the job to begin
-execution earlier than otherwise possible.
+execution earlier than otherwise possible.
The job's time limit will not be changed after the job is allocated resources.
-This is performed by a backfill scheduling algorithm to allocate resources
+This is performed by a backfill scheduling algorithm to allocate resources
otherwise reserved for higher priority jobs.
-Acceptable time formats include "minutes", "minutes:seconds",
-"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
+Acceptable time formats include "minutes", "minutes:seconds",
+"hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and
"days\-hours:minutes:seconds".
.TP
@@ -1188,7 +1233,7 @@
option can be useful to insure that a job is terminated in a timely fashion
in the event that one or more tasks terminate prematurely.
Note: The \fB\-K\fR, \fB\-\-kill\-on\-bad\-exit\fR option takes precedence
-over \fB\-W\fR, \fB\-\-wait\fR to terminate the job immediately if a task
+over \fB\-W\fR, \fB\-\-wait\fR to terminate the job immediately if a task
exits with a non\-zero exit code.
.TP
@@ -1224,8 +1269,8 @@
\fB\-Z\fR, \fB\-\-no\-allocate\fR
Run the specified tasks on a set of nodes without creating a SLURM
"job" in the SLURM queue structure, bypassing the normal resource
-allocation step. The list of nodes must be specified with the
-\fB\-w\fR, \fB\-\-nodelist\fR option. This is a privileged option
+allocation step. The list of nodes must be specified with the
+\fB\-w\fR, \fB\-\-nodelist\fR option. This is a privileged option
only available for the users "SlurmUser" and "root".
.PP
@@ -1244,23 +1289,28 @@
.TP
\fB\-\-conn\-type\fR=<\fItype\fR>
-Require the partition connection type to be of a certain type.
+Require the block connection type to be of a certain type.
On Blue Gene the acceptable of \fItype\fR are MESH, TORUS and NAV.
-If NAV, or if not set, then SLURM will try to fit a TORUS else MESH.
+If NAV, or if not set, then SLURM will try to fit a what the
+DefaultConnType is set to in the bluegene.conf if that isn't set the
+default is TORUS.
You should not normally set this option.
-SLURM will normally allocate a TORUS if possible for a given geometry.
If running on a BGP system and wanting to run in HTC mode (only for 1
midplane and below). You can use HTC_S for SMP, HTC_D for Dual, HTC_V
for virtual node mode, and HTC_L for Linux mode.
-A comma separated lists of connection types may be specified, one for each dimension.
+For systems that allow a different connection type per dimension you
+can supply a comma separated list of connection types may be specified, one for
+each dimension (i.e. M,T,T,T will give you a torus connection is all
+dimensions expect the first).
.TP
\fB\-g\fR, \fB\-\-geometry\fR=<\fIXxYxZ\fR>
Specify the geometry requirements for the job. The three numbers
represent the required geometry giving dimensions in the X, Y and
Z directions. For example "\-\-geometry=2x3x4", specifies a block
-of nodes having 2 x 3 x 4 = 24 nodes (actually base partitions on
-Blue Gene).
+of nodes having 2 x 3 x 4 = 24 nodes (actually midplanes on
+Blue Gene). On a BGQ system the dimensions are AxXxYxZ, and can not
+be used to allocation sub-blocks.
.TP
\fB\-\-ioload\-image\fR=<\fIpath\fR>
@@ -1346,8 +1396,15 @@
If stdin is only to be read by a subset of the spawned tasks, specifying a
file to read from rather than forwarding stdin from the \fBsrun\fR command may
be preferable as it avoids moving and storing data that will never be read.
+.PP
For OS X, the poll() function does not support stdin, so input from
a terminal is not possible.
+.PP
+For BGQ srun only supports stdin to 1 task running on the system. By
+default it is taskid 0 but can be changed with the \-i<taskid> as
+described below, or \-\-runjob\-opt="\-\-stdinrank=<taskid>". Outside
+of the taskid option none of the options below are available for a BGQ system.
+.PP
This behavior may be changed with the
\fB\-\-output\fR, \fB\-\-error\fR, and \fB\-\-input\fR
(\fB\-o\fR, \fB\-e\fR, \fB\-i\fR) options. Valid format specifications
@@ -1362,14 +1419,14 @@
stdout and stderr is not received from any task.
stdin is not sent to any task (stdin is closed).
.TP
-\fItaskid\fR
+\fBtaskid\fR
stdout and/or stderr are redirected from only the task with relative
id equal to \fItaskid\fR, where 0 <= \fItaskid\fR <= \fIntasks\fR,
where \fIntasks\fR is the total number of tasks in the current job step.
stdin is redirected from the stdin of \fBsrun\fR to this same task.
This file will be written on the node executing the task.
.TP
-\fIfilename\fR
+\fBfilename\fR
\fBsrun\fR will redirect stdout and/or stderr to the named file from
all tasks.
stdin will be redirected from the named file and broadcast to all
@@ -1378,7 +1435,7 @@
this may result in the output appearing in different places depending
on whether the job is run in batch mode.
.TP
-format string
+\fBformat string\fR
\fBsrun\fR allows for a format string to be used to generate the
named IO file
described above. The following list of format specifiers may be
@@ -1716,6 +1773,12 @@
\fBSLURM_NNODES\fR
Total number of nodes in the job's resource allocation
.TP
+\fBSLURM_NODE_ALIASES\fR
+Sets of node name, communication address and hostname for nodes allocated to
+the job from the cloud. Each element in the set if colon separated and each
+set is comma separated. For example:
+SLURM_NODE_ALIASES=ec0:1.2.3.4:foo,ec1:1.2.3.5:bar
+.TP
\fBSLURM_NODEID\fR
The relative node ID of the current node
.TP
@@ -1842,6 +1905,8 @@
have offset values of "0\-4").
Single quotes may be used to avoid having the enclosed values interpreted.
This field is optional.
+Any arguments for the program entered on the command line will be added
+to the arguments specified in the configuration file.
.PP
For example:
.nf
diff --git a/doc/man/man1/sshare.1 b/doc/man/man1/sshare.1
index bc90ff7..3d07885 100644
--- a/doc/man/man1/sshare.1
+++ b/doc/man/man1/sshare.1
@@ -1,4 +1,4 @@
-.TH SSHARE "1" "November 2008" "sshare 2.0" "SLURM Commands"
+.TH SSHARE "1" "March 2012" "sshare 2.0" "SLURM Commands"
.SH "NAME"
sshare \- Tool for listing the shares of associations to a cluster.
@@ -14,7 +14,8 @@
read in from the slurmctld and used to process the shares available
to a given association. sshare provides SLURM share information of
Account, User, Raw Shares, Normalized Shares, Raw Usage, Normalized
-Usage, Effective Usage, and the Fair-share factor for each association.
+Usage, Effective Usage, the Fair-share factor, the GrpCPUMins limit
+and accumulated currently running CPU-minutes for each association.
.SH "OPTIONS"
@@ -105,6 +106,15 @@
The Fair-Share factor, based on a user or account's assigned shares and
the effective usage charged to them or their accounts.
+.TP
+\f3GrpCPUMins\fP
+The CPU-minutes limit set on the account.
+
+.TP
+\f3CPURunMins\fP
+The number of CPU-minutes accumulated by jobs currently running against
+the account.
+
.SH "EXAMPLES"
.eo
.br
diff --git a/doc/man/man1/strigger.1 b/doc/man/man1/strigger.1
index 1949d3e..86452ab 100644
--- a/doc/man/man1/strigger.1
+++ b/doc/man/man1/strigger.1
@@ -1,4 +1,4 @@
-.TH STRIGGER "1" "December 2010" "strigger 2.3" "Slurm components"
+.TH STRIGGER "1" "September 2011" "strigger 2.4" "Slurm components"
.SH "NAME"
strigger \- Used set, get or clear Slurm trigger information.
@@ -30,14 +30,15 @@
The record of those events (e.g. nodes which went DOWN in the previous
15 seconds) will then be cleared.
The trigger program must set a new trigger before the end of the next
-interval to insure that no trigger events are missed.
+interval to insure that no trigger events are missed OR the trigger must be
+created with an argument of "\-\-flags=PERM".
If desired, multiple trigger programs can be set for the same event.
\fBIMPORTANT NOTE:\fR This command can only set triggers if run by the
user \fISlurmUser\fR unless \fISlurmUser\fR is configured as user root.
This is required for the \fIslurmctld\fR daemon to set the appropriate
user and group IDs for the executed program.
-Also note that the program is executed on the same node that the
+Also note that the trigger program is executed on the same node that the
\fIslurmctld\fR daemon uses rather than some allocated compute node.
To check the value of \fISlurmUser\fR, run the command:
@@ -75,9 +76,10 @@
.TP
\fB\-\-clear\fP
Clear or delete a previously defined event trigger.
-The \fB\-\-id\fR, \fB\-\-jobid\fR or \fB\-\-userid\fR
+The \fB\-\-id\fR, \fB\-\-jobid\fR or \fB\-\-user\fR
option must be specified to identify the trigger(s) to
be cleared.
+Only user root or the trigger's creator can delete a trigger.
.TP
\fB\-d\fR, \fB\-\-down\fR
@@ -100,6 +102,16 @@
Trigger an event when the specified job completes execution.
.TP
+\fB\-\-flags\fR=\fItype\fR
+Associate flags with the reservation. Multiple flags should be comma separated.
+Valid flags include:
+.RS
+.TP
+PERM
+Make the trigger permanent. Do not purge it after the event occurs.
+.RE
+
+.TP
\fB\-\-front_end\fR
Trigger events based upon changes in state of front end nodes rather than
compute nodes. Applies to BlueGene and Cray architectures only, where the
@@ -157,7 +169,10 @@
used in conjunction with the \fB\-\-up\fR, \fB\-\-down\fR or
\fB\-\-drained\fR option,
all nodes allocated to that job will considered the nodes used as a
-trigger event.
+trigger event.Since this option's argument is optional, for proper
+parsing the single letter option must be followed immediately with
+the value and not include a space between them. For example "\-ntux"
+and not "\-n tux".
.TP
\fB\-M\fR, \fB\-\-clusters\fR=<\fIstring\fR>
@@ -195,6 +210,8 @@
NOTE: An event is only triggered once. A new event trigger
must be set established for future events of the same type
to be processed.
+Triggers can only be set if the command is run by the user
+\fISlurmUser\fR unless \fISlurmUser\fR is configured as user root.
.TP
\fB\-t\fR, \fB\-\-time\fR
@@ -208,7 +225,9 @@
.TP
\fB\-\-user\fR=\fIuser_name_or_id\fR
-Clear or get triggers associated with the specified user.
+Clear or get triggers created by the specified user.
+For example, a trigger created by user \fIroot\fR for a job created by user
+\fIadam\fR could be cleared with an option \fI\-\-user=root\fR.
Specify either a user name or user ID.
.TP
diff --git a/doc/man/man2html.py b/doc/man/man2html.py
index a7dda8d..cc447db 100755
--- a/doc/man/man2html.py
+++ b/doc/man/man2html.py
@@ -160,8 +160,7 @@
for filename in files:
dirname, basefilename = os.path.split(filename)
-# newfilename = basefilename[:-6] + '.html'
- newfilename = filename[:-6] + '.html'
+ newfilename = basefilename[:-6] + '.html'
print 'Converting', filename, '->', newfilename
shtml = file(filename, 'r')
html = file(newfilename, 'w')
diff --git a/doc/man/man3/Makefile.am b/doc/man/man3/Makefile.am
new file mode 100644
index 0000000..a2ec99a
--- /dev/null
+++ b/doc/man/man3/Makefile.am
@@ -0,0 +1,133 @@
+man3_MANS = slurm_hostlist_create.3 \
+ slurm_hostlist_destroy.3 \
+ slurm_hostlist_shift.3 \
+ slurm_allocate_resources.3 \
+ slurm_allocate_resources_blocking.3 \
+ slurm_allocation_lookup.3 \
+ slurm_allocation_lookup_lite.3 \
+ slurm_allocation_msg_thr_create.3 \
+ slurm_allocation_msg_thr_destroy.3 \
+ slurm_api_version.3 \
+ slurm_checkpoint.3 \
+ slurm_checkpoint_able.3 \
+ slurm_checkpoint_complete.3 \
+ slurm_checkpoint_create.3 \
+ slurm_checkpoint_disable.3 \
+ slurm_checkpoint_enable.3 \
+ slurm_checkpoint_error.3 \
+ slurm_checkpoint_failed.3 \
+ slurm_checkpoint_restart.3 \
+ slurm_checkpoint_task_complete.3 \
+ slurm_checkpoint_tasks.3 \
+ slurm_checkpoint_vacate.3 \
+ slurm_clear_trigger.3 \
+ slurm_complete_job.3 \
+ slurm_confirm_allocation.3 \
+ slurm_create_partition.3 \
+ slurm_create_reservation.3 \
+ slurm_delete_partition.3 \
+ slurm_delete_reservation.3 \
+ slurm_free_ctl_conf.3 \
+ slurm_free_front_end_info_msg.3 \
+ slurm_free_job_info_msg.3 \
+ slurm_free_job_alloc_info_response_msg.3 \
+ slurm_free_job_step_create_response_msg.3 \
+ slurm_free_job_step_info_response_msg.3 \
+ slurm_free_node_info.3 \
+ slurm_free_node_info_msg.3 \
+ slurm_free_partition_info.3 \
+ slurm_free_partition_info_msg.3 \
+ slurm_free_reservation_info_msg.3 \
+ slurm_free_resource_allocation_response_msg.3 \
+ slurm_free_slurmd_status.3 \
+ slurm_free_submit_response_response_msg.3 \
+ slurm_free_trigger_msg.3 \
+ slurm_get_end_time.3 \
+ slurm_get_errno.3 \
+ slurm_get_job_steps.3 \
+ slurm_get_rem_time.3 \
+ slurm_get_select_jobinfo.3 \
+ slurm_get_triggers.3 \
+ slurm_init_update_front_end_msg.3 \
+ slurm_init_job_desc_msg.3 \
+ slurm_init_part_desc_msg.3 \
+ slurm_init_resv_desc_msg.3 \
+ slurm_init_update_node_msg.3 \
+ slurm_init_update_step_msg.3 \
+ slurm_job_cpus_allocated_on_node.3 \
+ slurm_job_cpus_allocated_on_node_id.3 \
+ slurm_job_step_create.3 \
+ slurm_job_step_launch_t_init.3 \
+ slurm_job_step_layout_get.3 \
+ slurm_job_step_layout_free.3 \
+ slurm_job_will_run.3 \
+ slurm_jobinfo_ctx_get.3 \
+ slurm_kill_job.3 \
+ slurm_kill_job_step.3 \
+ slurm_load_ctl_conf.3 \
+ slurm_load_front_end.3 \
+ slurm_load_job.3 \
+ slurm_load_jobs.3 \
+ slurm_load_node.3 \
+ slurm_load_partitions.3 \
+ slurm_load_reservations.3 \
+ slurm_load_slurmd_status.3 \
+ slurm_notify_job.3 \
+ slurm_perror.3 \
+ slurm_pid2jobid.3 \
+ slurm_ping.3 \
+ slurm_print_ctl_conf.3 \
+ slurm_print_front_end_info_msg.3 \
+ slurm_print_front_end_table.3 \
+ slurm_print_job_info.3 \
+ slurm_print_job_info_msg.3 \
+ slurm_print_job_step_info.3 \
+ slurm_print_job_step_info_msg.3 \
+ slurm_print_node_info_msg.3 \
+ slurm_print_node_table.3 \
+ slurm_print_partition_info.3 \
+ slurm_print_partition_info_msg.3 \
+ slurm_print_reservation_info.3 \
+ slurm_print_reservation_info_msg.3 \
+ slurm_print_slurmd_status.3 \
+ slurm_read_hostfile.3 \
+ slurm_reconfigure.3 \
+ slurm_resume.3 \
+ slurm_requeue.3 \
+ slurm_set_debug_level.3 \
+ slurm_set_trigger.3 \
+ slurm_shutdown.3 \
+ slurm_signal_job.3 \
+ slurm_signal_job_step.3 \
+ slurm_slurmd_status.3 \
+ slurm_sprint_front_end_table.3 \
+ slurm_sprint_job_info.3 \
+ slurm_sprint_job_step_info.3 \
+ slurm_sprint_node_table.3 \
+ slurm_sprint_partition_info.3 \
+ slurm_sprint_reservation_info.3 \
+ slurm_step_ctx_create.3 \
+ slurm_step_ctx_create_no_alloc.3 \
+ slurm_step_ctx_daemon_per_node_hack.3 \
+ slurm_step_ctx_destroy.3 \
+ slurm_step_ctx_params_t_init.3 \
+ slurm_step_ctx_get.3 \
+ slurm_step_launch.3 \
+ slurm_step_launch_fwd_signal.3 \
+ slurm_step_launch_abort.3 \
+ slurm_step_launch_wait_finish.3 \
+ slurm_step_launch_wait_start.3 \
+ slurm_strerror.3 \
+ slurm_submit_batch_job.3 \
+ slurm_suspend.3 \
+ slurm_takeover.3 \
+ slurm_terminate_job.3 \
+ slurm_terminate_job_step.3 \
+ slurm_update_front_end.3 \
+ slurm_update_job.3 \
+ slurm_update_node.3 \
+ slurm_update_partition.3 \
+ slurm_update_reservation.3 \
+ slurm_update_step.3
+
+EXTRA_DIST = $(man3_MANS)
diff --git a/doc/man/man3/Makefile.in b/doc/man/man3/Makefile.in
new file mode 100644
index 0000000..b528a35
--- /dev/null
+++ b/doc/man/man3/Makefile.in
@@ -0,0 +1,696 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = doc/man/man3
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+man3dir = $(mandir)/man3
+am__installdirs = "$(DESTDIR)$(man3dir)"
+NROFF = nroff
+MANS = $(man3_MANS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+man3_MANS = slurm_hostlist_create.3 \
+ slurm_hostlist_destroy.3 \
+ slurm_hostlist_shift.3 \
+ slurm_allocate_resources.3 \
+ slurm_allocate_resources_blocking.3 \
+ slurm_allocation_lookup.3 \
+ slurm_allocation_lookup_lite.3 \
+ slurm_allocation_msg_thr_create.3 \
+ slurm_allocation_msg_thr_destroy.3 \
+ slurm_api_version.3 \
+ slurm_checkpoint.3 \
+ slurm_checkpoint_able.3 \
+ slurm_checkpoint_complete.3 \
+ slurm_checkpoint_create.3 \
+ slurm_checkpoint_disable.3 \
+ slurm_checkpoint_enable.3 \
+ slurm_checkpoint_error.3 \
+ slurm_checkpoint_failed.3 \
+ slurm_checkpoint_restart.3 \
+ slurm_checkpoint_task_complete.3 \
+ slurm_checkpoint_tasks.3 \
+ slurm_checkpoint_vacate.3 \
+ slurm_clear_trigger.3 \
+ slurm_complete_job.3 \
+ slurm_confirm_allocation.3 \
+ slurm_create_partition.3 \
+ slurm_create_reservation.3 \
+ slurm_delete_partition.3 \
+ slurm_delete_reservation.3 \
+ slurm_free_ctl_conf.3 \
+ slurm_free_front_end_info_msg.3 \
+ slurm_free_job_info_msg.3 \
+ slurm_free_job_alloc_info_response_msg.3 \
+ slurm_free_job_step_create_response_msg.3 \
+ slurm_free_job_step_info_response_msg.3 \
+ slurm_free_node_info.3 \
+ slurm_free_node_info_msg.3 \
+ slurm_free_partition_info.3 \
+ slurm_free_partition_info_msg.3 \
+ slurm_free_reservation_info_msg.3 \
+ slurm_free_resource_allocation_response_msg.3 \
+ slurm_free_slurmd_status.3 \
+ slurm_free_submit_response_response_msg.3 \
+ slurm_free_trigger_msg.3 \
+ slurm_get_end_time.3 \
+ slurm_get_errno.3 \
+ slurm_get_job_steps.3 \
+ slurm_get_rem_time.3 \
+ slurm_get_select_jobinfo.3 \
+ slurm_get_triggers.3 \
+ slurm_init_update_front_end_msg.3 \
+ slurm_init_job_desc_msg.3 \
+ slurm_init_part_desc_msg.3 \
+ slurm_init_resv_desc_msg.3 \
+ slurm_init_update_node_msg.3 \
+ slurm_init_update_step_msg.3 \
+ slurm_job_cpus_allocated_on_node.3 \
+ slurm_job_cpus_allocated_on_node_id.3 \
+ slurm_job_step_create.3 \
+ slurm_job_step_launch_t_init.3 \
+ slurm_job_step_layout_get.3 \
+ slurm_job_step_layout_free.3 \
+ slurm_job_will_run.3 \
+ slurm_jobinfo_ctx_get.3 \
+ slurm_kill_job.3 \
+ slurm_kill_job_step.3 \
+ slurm_load_ctl_conf.3 \
+ slurm_load_front_end.3 \
+ slurm_load_job.3 \
+ slurm_load_jobs.3 \
+ slurm_load_node.3 \
+ slurm_load_partitions.3 \
+ slurm_load_reservations.3 \
+ slurm_load_slurmd_status.3 \
+ slurm_notify_job.3 \
+ slurm_perror.3 \
+ slurm_pid2jobid.3 \
+ slurm_ping.3 \
+ slurm_print_ctl_conf.3 \
+ slurm_print_front_end_info_msg.3 \
+ slurm_print_front_end_table.3 \
+ slurm_print_job_info.3 \
+ slurm_print_job_info_msg.3 \
+ slurm_print_job_step_info.3 \
+ slurm_print_job_step_info_msg.3 \
+ slurm_print_node_info_msg.3 \
+ slurm_print_node_table.3 \
+ slurm_print_partition_info.3 \
+ slurm_print_partition_info_msg.3 \
+ slurm_print_reservation_info.3 \
+ slurm_print_reservation_info_msg.3 \
+ slurm_print_slurmd_status.3 \
+ slurm_read_hostfile.3 \
+ slurm_reconfigure.3 \
+ slurm_resume.3 \
+ slurm_requeue.3 \
+ slurm_set_debug_level.3 \
+ slurm_set_trigger.3 \
+ slurm_shutdown.3 \
+ slurm_signal_job.3 \
+ slurm_signal_job_step.3 \
+ slurm_slurmd_status.3 \
+ slurm_sprint_front_end_table.3 \
+ slurm_sprint_job_info.3 \
+ slurm_sprint_job_step_info.3 \
+ slurm_sprint_node_table.3 \
+ slurm_sprint_partition_info.3 \
+ slurm_sprint_reservation_info.3 \
+ slurm_step_ctx_create.3 \
+ slurm_step_ctx_create_no_alloc.3 \
+ slurm_step_ctx_daemon_per_node_hack.3 \
+ slurm_step_ctx_destroy.3 \
+ slurm_step_ctx_params_t_init.3 \
+ slurm_step_ctx_get.3 \
+ slurm_step_launch.3 \
+ slurm_step_launch_fwd_signal.3 \
+ slurm_step_launch_abort.3 \
+ slurm_step_launch_wait_finish.3 \
+ slurm_step_launch_wait_start.3 \
+ slurm_strerror.3 \
+ slurm_submit_batch_job.3 \
+ slurm_suspend.3 \
+ slurm_takeover.3 \
+ slurm_terminate_job.3 \
+ slurm_terminate_job_step.3 \
+ slurm_update_front_end.3 \
+ slurm_update_job.3 \
+ slurm_update_node.3 \
+ slurm_update_partition.3 \
+ slurm_update_reservation.3 \
+ slurm_update_step.3
+
+EXTRA_DIST = $(man3_MANS)
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/man/man3/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu doc/man/man3/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-man3: $(man3_MANS)
+ @$(NORMAL_INSTALL)
+ test -z "$(man3dir)" || $(MKDIR_P) "$(DESTDIR)$(man3dir)"
+ @list='$(man3_MANS)'; test -n "$(man3dir)" || exit 0; \
+ { for i in $$list; do echo "$$i"; done; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man3dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man3dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+ while read files; do \
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man3dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man3dir)" || exit $$?; }; \
+ done; }
+
+uninstall-man3:
+ @$(NORMAL_UNINSTALL)
+ @list='$(man3_MANS)'; test -n "$(man3dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ dir='$(DESTDIR)$(man3dir)'; $(am__uninstall_files_from_dir)
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(MANS)
+installdirs:
+ for dir in "$(DESTDIR)$(man3dir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man3
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-man
+
+uninstall-man: uninstall-man3
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-man install-man3 \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ uninstall uninstall-am uninstall-man uninstall-man3
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/man/man3/slurm_allocate_resources.3 b/doc/man/man3/slurm_allocate_resources.3
index 886b8be..e646152 100644
--- a/doc/man/man3/slurm_allocate_resources.3
+++ b/doc/man/man3/slurm_allocate_resources.3
@@ -213,6 +213,8 @@
.LP
\fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re\-link your code.
.LP
+\fBESLURM_CAN_NOT_START_IMMEDIATELY\fR the job can not be started immeidately as requested.
+.LP
\fBESLURM_DEFAULT_PARTITION_NOT_SET\fR the system lacks a valid default partition.
.LP
\fBESLURM_JOB_MISSING_PARTITION_KEY\fR use of this partition is restricted through a credential provided only to user root. This job lacks such a valid credential.
diff --git a/doc/man/man5/Makefile.am b/doc/man/man5/Makefile.am
new file mode 100644
index 0000000..4ae881e
--- /dev/null
+++ b/doc/man/man5/Makefile.am
@@ -0,0 +1,34 @@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+
+man5_MANS = bluegene.conf.5 \
+ cgroup.conf.5 \
+ cray.conf.5 \
+ gres.conf.5 \
+ slurm.conf.5 \
+ slurmdbd.conf.5 \
+ topology.conf.5 \
+ wiki.conf.5
+
+EXTRA_DIST = $(man5_MANS)
+
+if HAVE_MAN2HTML
+
+html_DATA = \
+ bluegene.conf.html \
+ cgroup.conf.html \
+ gres.conf.html \
+ slurm.conf.html \
+ slurmdbd.conf.html \
+ topology.conf.html \
+ wiki.conf.html
+
+MOSTLYCLEANFILES = ${html_DATA}
+
+EXTRA_DIST += $(html_DATA)
+
+SUFFIXES = .html
+
+.5.html:
+ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+endif
diff --git a/doc/man/man5/Makefile.in b/doc/man/man5/Makefile.in
new file mode 100644
index 0000000..1199d62
--- /dev/null
+++ b/doc/man/man5/Makefile.in
@@ -0,0 +1,611 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_MAN2HTML_TRUE@am__append_1 = $(html_DATA)
+subdir = doc/man/man5
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+man5dir = $(mandir)/man5
+am__installdirs = "$(DESTDIR)$(man5dir)" "$(DESTDIR)$(htmldir)"
+NROFF = nroff
+MANS = $(man5_MANS)
+DATA = $(html_DATA)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+man5_MANS = bluegene.conf.5 \
+ cgroup.conf.5 \
+ cray.conf.5 \
+ gres.conf.5 \
+ slurm.conf.5 \
+ slurmdbd.conf.5 \
+ topology.conf.5 \
+ wiki.conf.5
+
+EXTRA_DIST = $(man5_MANS) $(am__append_1)
+@HAVE_MAN2HTML_TRUE@html_DATA = \
+@HAVE_MAN2HTML_TRUE@ bluegene.conf.html \
+@HAVE_MAN2HTML_TRUE@ cgroup.conf.html \
+@HAVE_MAN2HTML_TRUE@ gres.conf.html \
+@HAVE_MAN2HTML_TRUE@ slurm.conf.html \
+@HAVE_MAN2HTML_TRUE@ slurmdbd.conf.html \
+@HAVE_MAN2HTML_TRUE@ topology.conf.html \
+@HAVE_MAN2HTML_TRUE@ wiki.conf.html
+
+@HAVE_MAN2HTML_TRUE@MOSTLYCLEANFILES = ${html_DATA}
+@HAVE_MAN2HTML_TRUE@SUFFIXES = .html
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .html .5
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/man/man5/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu doc/man/man5/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-man5: $(man5_MANS)
+ @$(NORMAL_INSTALL)
+ test -z "$(man5dir)" || $(MKDIR_P) "$(DESTDIR)$(man5dir)"
+ @list='$(man5_MANS)'; test -n "$(man5dir)" || exit 0; \
+ { for i in $$list; do echo "$$i"; done; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man5dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man5dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+ while read files; do \
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man5dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man5dir)" || exit $$?; }; \
+ done; }
+
+uninstall-man5:
+ @$(NORMAL_UNINSTALL)
+ @list='$(man5_MANS)'; test -n "$(man5dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ dir='$(DESTDIR)$(man5dir)'; $(am__uninstall_files_from_dir)
+install-htmlDATA: $(html_DATA)
+ @$(NORMAL_INSTALL)
+ test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \
+ done
+
+uninstall-htmlDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir)
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(MANS) $(DATA)
+installdirs:
+ for dir in "$(DESTDIR)$(man5dir)" "$(DESTDIR)$(htmldir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+ -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-htmlDATA install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man5
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-htmlDATA uninstall-man
+
+uninstall-man: uninstall-man5
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-htmlDATA install-info install-info-am install-man \
+ install-man5 install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am uninstall uninstall-am uninstall-htmlDATA \
+ uninstall-man uninstall-man5
+
+
+@HAVE_MAN2HTML_TRUE@.5.html:
+@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/man/man5/bluegene.conf.5 b/doc/man/man5/bluegene.conf.5
index 5b36c87..87289d8 100644
--- a/doc/man/man5/bluegene.conf.5
+++ b/doc/man/man5/bluegene.conf.5
@@ -93,6 +93,27 @@
IoloadImage used for creation of all bgblocks.
There is no default value and this must be specified.
+.SH "The BlueGene/Q specific options are:"
+.TP
+\fAllowSubBlockAllocations\fR
+Can be set to Yes or No, defaults to No. This option allows multiple users to
+run jobs as small as 1 cnode in size on a block one midplane in size and
+smaller. While this option gives great flexibility to run a host of job
+sizes previously not available on any BlueGene system it also may cause
+security concerns since IO traffic can share the same path with other jobs.
+
+NOTE - There is a current limitation for sub-block jobs and how the system
+(used for I/O) and user (used for MPI) torus class routes are configured. The
+network device hardware has cutoff registers to prevent packets from flowing
+outside of the sub-block. Unfortunately, when the sub-block has a size 3,
+the job can attempt to send user packets outside of its sub-block. This causes
+it to be terminated by signal 36. To prevent this from happening SLURM does
+not allow a sub-block to be used with any dimension of 3.
+
+NOTE - In the current IBM API it does not allow wrapping inside a midplane.
+Meaning you can not create a sub-block of 2 with nodes in the 0 and 3 position.
+SLURM will support this in the future when the underlying system allows it.
+
.SH "All options below are common on all BlueGene systems:"
.TP
\fBAltMloaderImage\fR
@@ -103,11 +124,6 @@
groups. You can put as many alternative images as you want in the conf file.
.TP
-\fBBasePartitionNodeCount\fR
-The number of c\-nodes (compute nodes) per base partition.
-There is no default value and this must be specified (usually 512).
-
-.TP
\fBBridgeAPILogFile\fR
Fully qualified pathname of a into which the Bridge API logs are
to be written.
@@ -131,6 +147,15 @@
.RE
.TP
+\fBDefaultConnType\fR
+Specify the default Connection Type(s) to be used when generating new blocks
+in Dynamic LayoutMode. The default value is TORUS. On a BGQ system you can
+specify a different connection type for each dimension. (i.e. T,T,T,M would
+make the default be torus in all dimensions except Z where it would be mesh)
+NOTE - If a block is requested that can use all the midplanes in a dimension
+torus will always be used.
+
+.TP
\fBDenyPassthrough\fR
Specify which dimensions you do not want to allow pass\-throughs.
Valid options are A, X, Y, Z or all ("A" applies only to BlueGene/Q systems).
@@ -139,6 +164,18 @@
By default, pass\-throughs are enabled in every dimension.
.TP
+\fBIONodesPerMP\fR
+The number of IO nodes on a midplane. This number must be the smallest
+number if you have a heterogeneous system.
+There is no default value and this must be specified. The typical settings
+for BlueGene/L systems are as follows: For IO rich systems, 64 is the value that
+should be used to create small blocks. For systems that are not IO rich, or
+for which small blocks are not desirable, 8 is usually the number to use.
+For BlueGene/P IO rich systems, 32 is the value that should be used to create
+small blocks since there are only 2 IO nodes per nodecard instead of 4 as on
+BlueGene/L.
+
+.TP
\fBLayoutMode\fR
Describes how SLURM should create bgblocks.
.RS
@@ -149,43 +186,50 @@
\fBOVERLAP\fR:
Create and use the defined bgblocks, which may overlap.
It is highly recommended that none of the bgblocks have any passthroughs
-in the X\-dimension.
+in the X\-dimension on BGL and BGP systems.
\fBUse this mode with extreme caution.\fR
.TP
\fBDYNAMIC\fR:
Create and use bgblocks as needed for each job.
Bgblocks will not be defined in the bluegene.conf file.
-Dynamic partitioning may introduce fragmentation of resources
-and starvation of larger jobs.
-\fBUse this mode with caution.\fR
+Dynamic partitioning may introduce fragmentation of resources.
+\fBUse this mode with mild caution.\fR
.RE
.TP
+\fBMaxBlockInError\fR
+MaxBlockInError is used on BGQ systems to specify the percentage of a block
+allowed in an error state before no future jobs are allowed. Since cnodes can
+go into Software Failure and allow the block to not fail this option is used
+when allowing multiple jobs to run on a block and once the percentage of cnodes
+in that block breach this limit no future jobs will be allowed to be run on
+the block. After all jobs are finished on the block the block is freed which
+will resolve any cnodes in an error state. Default is 0, which means once
+any cnodes are in an error state disallow future jobs.
+
+.TP
+\fBMidplaneNodeCnt\fR
+The number of c\-nodes (compute nodes) per midplane.
+There is no default value and this must be specified (usually 512).
+
+.TP
\fBMloaderImage\fR
MloaderImage used for creation of all bgblocks.
There is no default value and this must be specified.
.TP
-\fBNodeCardNodeCount\fR
-Number of c\-nodes per node card.
-There is no default value and this must be specified. For BlueGene/L systems
+\fBNodeCardNodeCnt\fR or \fBNodeBoardNodeCnt\fR
+Number of c\-nodes per nodecard / nodeboard.
+There is no default value and this must be specified. For most BlueGene systems
this is usually 32.
.TP
-\fBNumPsets\fR
-The Numpsets used for creation of all bgblocks. This value really means the
-number of IO nodes on a base partition. This number must be the smallest
-number if you have a heterogeneous system.
-There is no default value and this must be specified. The typical settings
-for BlueGene/L systems are as follows: For IO rich systems, 64 is the value that
-should be used to create small blocks. For systems that are not IO rich, or
-for which small blocks are not desirable, 8 is usually the number to use.
-For BlueGene/P IO rich systems, 32 is the value that should be used to create
-small blocks since there are only 2 IO nodes per nodecard instead of 4 as on
-BlueGene/L.
+\fBSubMidplaneSystem\fR
+Set to Yes if this system is not a full midplane in size, Default is No
+(regular system).
.LP
-Each bgblock is defined by the base partitions used to construct it.
+Each bgblock is defined by the midplanes used to construct it.
Ordering is very important for laying out switch wires. Please use the smap
tool to define blocks and do not change the order of blocks created.
A bgblock is implicitly created containing all resources on the system.
@@ -197,7 +241,7 @@
will be based upon the NodeName defined in slurm.conf
.TP
-\fBBPs\fR
+\fBMPs\fR
Define the coordinates of the bgblock end points.
For BlueGene/L and BlueGene/P systems there will be three coordinates (X, Y, and Z).
For BlueGene/Q systems there will be for coordinates (A, X, Y, and Z).
@@ -205,26 +249,32 @@
.TP
\fBType\fR
Define the network connection type for the bgblock.
-The default value is TORUS.
+The default value is TORUS. On a BGQ system you can
+specify a different connection type for each dimension. (i.e. T,T,T,M would
+make the default be torus in all dimensions except Z where it would be mesh)
+NOTE - If a block is requested that can use all the midplanes in a dimension
+torus will always be used.
+
.RS
.TP 8
\fBMESH\fR:
Communication occur over a mesh.
.TP
\fBSMALL\fR:
-The base partition is divided into more than one bgblock.
-The administrator should define the number of single node cards and
-quarter base partition blocks using the options \fB32CNBlocks\fR and
-\fB128CNBlocks\fR respectively for a BlueGene/L system. \fB16CNBlocks\fR,
-\fB64CNBlocks\fR, and \fB256CNBlocks\fR are also available for
-BlueGene/P systems. Keep in mind you
-must have enough IO nodes to make all these configurations possible.
+The midplane is divided into more than one bgblock.
+The administrator should define the number of single nodecards and
+quarter midplane blocks using the options \fB32CNBlocks\fR and
+\fB128CNBlocks\fR respectively for a BlueGene/L system. \fB64CNBlocks\fR,
+and \fB256CNBlocks\fR are also available for later BlueGene systems.
+\fB16CNBlocks\fR is also valid on BlueGene/P systems. Keep in mind you
+must have at keast one IO node per block. So if you only have 4 ionodes per
+midplane the smallest block you will be able to make is 128 c-nodes.
-The total number of c\-nodes in defined blocks must not exceed
-\fBBasePartitionNodeCnt\fR.
-If not specified, the base partition will be divided into four
-blocks.
+The total number of c\-nodes of the blocks in a small request must not exceed
+\fBMidplaneNodeCnt\fR.
+If none are specified, the midplane will be divided into four 128 c-node blocks.
See example below.
+
.TP
\fBTORUS\fR:
Communications occur over a torus (end\-points of network directly connect.
@@ -235,7 +285,7 @@
.br
##################################################################
.br
-# bluegene.conf
+# bluegene.conf for a Bluegene/L system
.br
# build by smap on 03/06/2006
.br
@@ -253,11 +303,11 @@
.br
RamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk.elf
.br
-BasePartitionNodeCnt=512
+MidplaneNodeCnt=512
.br
NodeCardNodeCnt=32
.br
-NumPsets=64 # An I/O rich environment
+IONodesPerMP=64 # An I/O rich environment
.br
LayoutMode=STATIC
.br
@@ -281,7 +331,7 @@
.br
BPs=[332] Type=TORUS # 1x1x1 = 1
.br
-BPs=[333] Type=SMALL 32CNBlocks=4 128CNBlocks=3 # 1/16 * 4 + 1/4 * 3
+BPs=[333] Type=SMALL 32CNBlocks=4 128CNBlocks=3 # 32 * 4 + 128 * 3 = 512
.SH "COPYING"
Copyright (C) 2006-2010 The Regents of the University of California.
diff --git a/doc/man/man5/cgroup.conf.5 b/doc/man/man5/cgroup.conf.5
index eaf47b2..69c48bb 100644
--- a/doc/man/man5/cgroup.conf.5
+++ b/doc/man/man5/cgroup.conf.5
@@ -1,4 +1,4 @@
-.TH "cgroup.conf" "5" "December 2010" "cgroup.conf 2.2" \
+.TH "cgroup.conf" "5" "June 2012" "cgroup.conf 2.2" \
"Slurm cgroup configuration file"
.SH "NAME"
@@ -6,7 +6,7 @@
.SH "DESCRIPTION"
-\fBcgroup.conf\fP is an ASCII file which defines parameters used by
+\fBcgroup.conf\fP is an ASCII file which defines parameters used by
Slurm's Linux cgroup related plugins.
The file location can be modified at system build time using the
DEFAULT_SLURM_CONF parameter or at execution time by setting the SLURM_CONF
@@ -20,9 +20,10 @@
Changes to the configuration file take effect upon restart of
SLURM daemons, daemon receipt of the SIGHUP signal, or execution
of the command "scontrol reconfigure" unless otherwise noted.
+
.LP
-Two cgroup plugins are currently available in SLURM. The first
-one is a proctrack plugin, the second one a task plugin.
+For general Slurm Cgroups information, see the Cgroups Guide at
+<http://www.schedmd.com/slurmdocs/cgroups.html>.
.LP
The following cgroup.conf parameters are defined to control the general behavior
@@ -38,79 +39,33 @@
\fBCgroupAutomount\fR=<yes|no>
Slurm cgroup plugins require valid and functional cgroup subsystem to be mounted
under /cgroup/<subsystem_name>.
-When launched, plugins check their subsystem availability. If not available,
-the plugin launch fails unless CgroupAutomount is set to yes. In that case, the
+When launched, plugins check their subsystem availability. If not available,
+the plugin launch fails unless CgroupAutomount is set to yes. In that case, the
plugin will first try to mount the required subsystems.
.TP
\fBCgroupReleaseAgentDir\fR=<path_to_release_agent_directory>
-Used to tune the cgroup system behavior. This parameter identifies the location
-of the directory containing Slurm cgroup release_agent files. A release_agent file
-is required for each mounted subsystem. The release_agent file name must have the
-following format: release_<subsystem_name>. For instance, the release_agent file
-for the cpuset subsystem must be named release_cpuset. See also CLEANUP OF
-CGROUPS below.
-
-.SH "PROCTRACK/CGROUP PLUGIN"
-
-Slurm \fBproctrack/cgroup\fP plugin is used to track processes using the
-freezer control group subsystem. It creates a hierarchical set of
-directories for each step, putting the step tasks into the leaf.
-.LP
-This directory structure is like the following:
-.br
-/cgroup/freezer/uid_%uid/job_%jobid/step_%stepid
-.LP
-Slurm cgroup proctrack plugin is enabled with the following parameter
-in slurm.conf:
-.br
-ProctrackType=proctrack/cgroup
-
-.LP
-No particular cgroup.conf parameter is defined to control the behavior
-of this particular plugin.
-
+Used to tune the cgroup system behavior. This parameter identifies the location
+of the directory containing Slurm cgroup release_agent files.
.SH "TASK/CGROUP PLUGIN"
.LP
-Slurm \fBtask/cgroup\fP plugin is used to enforce allocated resources
-constraints, thus avoiding tasks to use unallocated resources. It currently
-only uses cpuset subsystem but could use memory and devices subsystems in a
-near future too.
-
-.LP
-It creates a hierarchical set of directories for each task and subsystem.
-The directory structure is like the following:
-.br
-/cgroup/%subsys/uid_%uid/job_%jobid/step_%stepid/task_%taskid
-
-.LP
-Slurm cgroup task plugin is enabled with the following parameter
-in slurm.conf:
-.br
-TaskPlugin=task/cgroup
-
-.LP
The following cgroup.conf parameters are defined to control the behavior
of this particular plugin:
.TP
\fBConstrainCores\fR=<yes|no>
-If configured to "yes" then constrain allowed cores to the subset of
+If configured to "yes" then constrain allowed cores to the subset of
allocated resources. It uses the cpuset subsystem.
The default value is "no".
+
.TP
\fBTaskAffinity\fR=<yes|no>
-If configured to "yes" then set a default task affinity to bind each step
+If configured to "yes" then set a default task affinity to bind each step
task to a subset of the allocated cores using \fBsched_setaffinity\fP.
The default value is "no".
-.LP
-The following cgroup.conf parameters could be defined to control the behavior
-of this particular plugin in a next version where memory and devices support
-would be added :
-
.TP
\fBAllowedRAMSpace\fR=<number>
Constrain the job cgroup RAM to this percentage of the allocated memory.
@@ -138,7 +93,13 @@
.TP
\fBConstrainSwapSpace\fR=<yes|no>
If configured to "yes" then constrain the job's swap space usage.
-The default value is "no".
+The default value is "no". Note that when set to "yes" and
+ConstrainRAMSpace is set to "no", AllowedRAMSpace is automatically set
+to 100% in order to limit the RAM+Swap amount to 100% of job's requirement
+plus the percent of allowed swap space. This amount is thus set to both
+RAM and RAM+Swap limits. This means that in that particular case,
+ConstrainRAMSpace is automatically enabled with the same limit than the one
+used to constrain swap space.
Also see \fBAllowedSwapSpace\fR.
.TP
@@ -171,13 +132,13 @@
.TP
\fBAllowedDevicesFile\fR=<path_to_allowed_devices_file>
-If the ConstrainDevices field is set to "yes" then this file has to be used to declare
-the devices that need to be allowed by default for all the jobs. The current implementation
+If the ConstrainDevices field is set to "yes" then this file has to be used to declare
+the devices that need to be allowed by default for all the jobs. The current implementation
of cgroup devices subsystem works as a whitelist of entries, which means that in order to
isolate the access of a job upon particular devices we need to allow the access on all
-the devices, supported by default and then deny on those that the job does not have the
-permission to use. The default value is "/etc/slurm/cgroup_allowed_devices_file.conf". The syntax of
-the file accepts one device per line and it permits lines like /dev/sda* or /dev/cpu/*/*.
+the devices, supported by default and then deny on those that the job does not have the
+permission to use. The default value is "/etc/slurm/cgroup_allowed_devices_file.conf". The syntax of
+the file accepts one device per line and it permits lines like /dev/sda* or /dev/cpu/*/*.
See also an example of this file in etc/allowed_devices_file.conf.example.
@@ -198,72 +159,8 @@
.br
#
-.SH "NOTES"
-.LP
-Only one instance of a cgroup subsystem is valid at a time in the kernel.
-If you try to mount another cgroup hierarchy that uses the same cpuset
-subsystem it will fail.
-However you can mount another cgroup hierarchy for a different cpuset
-subsystem.
-
-.SH CLEANUP OF CGROUPS
-.LP
-To allow cgroups to be removed automatically when they are no longer in use
-the notify_on_release flag is set in each cgroup when the cgroup is
-instantiated. The release_agent file for each subsystem is set up when the
-subsystem is mounted. The name of each release_agent file is
-release_<subsystem name>. The directory is specified via the
-CgroupReleaseAgentDir parameter in cgroup.conf. A simple release agent
-mechanism to remove slurm cgroups when they become empty may be set up by
-creating the release agent files for each required subsystem as symbolic
-links to a common release agent script, as shown in the example below:
-
-[sulu] (slurm) etc> cat cgroup.conf | grep CgroupReleaseAgentDir
-.br
-CgroupReleaseAgentDir="/etc/slurm/cgroup"
-.br
-
-[sulu] (slurm) etc> ls \-al /etc/slurm/cgroup
-.br
-total 12
-.br
-drwxr-xr-x 2 root root 4096 2010-04-23 14:55 .
-.br
-drwxr-xr-x 4 root root 4096 2010-07-22 14:48 ..
-.br
-\-rwxrwxrwx 1 root root 234 2010-04-23 14:52 release_common
-.br
-lrwxrwxrwx 1 root root 32 2010-04-23 11:04 release_cpuset -> /etc/slurm/cgroup/release_common
-.br
-lrwxrwxrwx 1 root root 32 2010-04-23 11:03 release_freezer -> /etc/slurm/cgroup/release_common
-
-[sulu] (slurm) etc> cat /etc/slurm/cgroup/release_common
-.br
-#!/bin/bash
-.br
-base_path=/cgroup
-.br
-progname=$(basename $0)
-.br
-subsystem=${progname##*_}
-.br
-.br
-rmcg=${base_path}/${subsystem}$@
-.br
-uidcg=${rmcg%/job*}
-.br
-if [[ \-d ${base_path}/${subsystem} ]]
-.br
-then
-.br
- flock \-x ${uidcg} \-c "rmdir ${rmcg}"
-.br
-fi
-.br
-[sulu] (slurm) etc>
-
.SH "COPYING"
-Copyright (C) 2010 Lawrence Livermore National Security.
+Copyright (C) 2010-2012 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
CODE\-OCEC\-09\-009. All rights reserved.
.LP
diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5
index 455a03d..09413dc 100644
--- a/doc/man/man5/gres.conf.5
+++ b/doc/man/man5/gres.conf.5
@@ -1,4 +1,4 @@
-.TH "gres.conf" "5" "April 2012" "gres.conf 2.3" "Slurm configuration file"
+.TH "gres.conf" "5" "July 2012" "gres.conf 2.3" "Slurm configuration file"
.SH "NAME"
gres.conf \- Slurm configuration file for generic resource management.
@@ -28,6 +28,8 @@
The default value is set to the number of \fBFile\fR values specified (if any),
otherwise the default value is one. A suffix of "K", "M" or "G" may be used
to mulitply the number by 1024, 1048576 or 1073741824 respectively.
+Note that Count is a 32\-bit field and the maximum value is 4,294,967,295.
+
.TP
\fBCPUs\fR
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index d895fb6..51806aa 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,7 +1,8 @@
-.TH "slurm.conf" "5" "September 2011" "slurm.conf 2.3" "Slurm configuration file"
+.TH "slurm.conf" "6" "April 2012" "slurm.conf 2.4" "Slurm configuration file"
.SH "NAME"
slurm.conf \- Slurm configuration file
+
.SH "DESCRIPTION"
\fBslurm.conf\fP is an ASCII file which describes general SLURM
configuration information, the nodes to be managed, information about
@@ -116,7 +117,10 @@
will be written to the SLURM DBD, which manages an underlying MySQL or
PostgreSQL database. See "man slurmdbd" for more information. The
default value is "accounting_storage/none" and indicates that account
-records are not maintained. Note: the PostgreSQL plugin is not
+records are not maintained.
+Note: The filetxt plugin records only a limited subset of accounting
+information and will prevent some sacct options from proper operation.
+Note: the PostgreSQL plugin is not
complete and should not be used if wanting to use associations. It
will however work with basic accounting of jobs and job steps. If
interested in completing, please email slurm-dev@lists.llnl.gov. Also
@@ -209,6 +213,7 @@
NOTE: If a file is found at sbin/scch (relative to the SLURM installation
location), it will be executed upon completion of the checkpoint. This can
be a script used for managing the checkpoint files.
+NOTE: SLURM's BLCR logic only supports batch jobs.
.TP
\fBcheckpoint/none\fR
no checkpoint support (default)
@@ -221,7 +226,11 @@
\fBClusterName\fR
The name by which this SLURM managed cluster is known in the
accounting database. This is needed distinguish accounting records
-when multiple clusters report to the same database.
+when multiple clusters report to the same database. Because of limitations
+in some databases, any upper case letters in the name will be silently mapped
+to lower case. In order to avoid confusion, it is recommended that the name
+be lower case.
+
.TP
\fBCompleteWait\fR
@@ -422,8 +431,10 @@
.TP
\fBEpilog\fR
Fully qualified pathname of a script to execute as user root on every
-node when a user's job completes (e.g. "/usr/local/slurm/epilog"). This may
-be used to purge files, disable user login, etc.
+node when a user's job completes (e.g. "/usr/local/slurm/epilog"). A
+glob pattern (See \fBglob\fR(7)) may also be used to run more than
+one epilog script (e.g. "/etc/slurm/epilog.d/*"). The Epilog script
+or scripts may be used to purge files, disable user login, etc.
By default there is no epilog.
See \fBProlog and Epilog Scripts\fR for more information.
@@ -572,11 +583,18 @@
\fBJobAcctGatherType\fR
The job accounting mechanism type.
Acceptable values at present include "jobacct_gather/aix" (for AIX operating
-system), "jobacct_gather/linux" (for Linux operating system) and "jobacct_gather/none"
+system), "jobacct_gather/linux" (for Linux operating system),
+"jobacct_gather/cgroup (experimental) and "jobacct_gather/none"
(no accounting data collected).
The default value is "jobacct_gather/none".
-In order to use the \fBsstat\fR tool, "jobacct_gather/aix" or "jobacct_gather/linux"
-must be configured.
+"jobacct_gather/cgroup" is an experimental plugin for the Linux operating system
+that uses cgroups to collect accounting statistics. The plugin collects the
+following statistics: From the cgroup memory subsystem: memory.usage_in_bytes
+(reported as 'pages') and rss from memory.stat (reported as 'rss'). From the
+cgroup cpuacct subsystem: user cpu time and system cpu time. No value
+is provided by cgroups for virtual memory size ('vsize').
+In order to use the \fBsstat\fR tool, "jobacct_gather/aix", "jobacct_gather/linux",
+or "jobacct_gather/cgroup" must be configured.
.TP
\fBJobAcctGatherFrequency\fR
@@ -820,6 +838,7 @@
\fBmvapich\fR,
\fBnone\fR (default, which works for many other versions of MPI) and
\fBopenmpi\fR.
+\fBpmi2\fR,
More information about MPI use is available here
<http://www.schedmd.com/slurmdocs/mpi_guide.html>.
@@ -951,6 +970,17 @@
Supported values are "YES" and "NO". The default value is "NO".
.TP
+\fBPriorityFlags\fR
+Flags to modify priority behavior
+Applicable only if PriorityType=priority/multifactor.
+.RS
+.TP 17
+\fBACCRUE_ALWAYS\fR
+If set, priority age factor will be increased despite job dependencies
+or holds.
+.RE
+
+.TP
\fBPriorityMaxAge\fR
Specifies the job age which will be given the maximum age factor in computing
priority. For example, a value of 30 minutes would result in all jobs over
@@ -1123,11 +1153,13 @@
\fBProlog\fR
Fully qualified pathname of a program for the slurmd to execute
whenever it is asked to run a job step from a new job allocation (e.g.
-"/usr/local/slurm/prolog"). The slurmd executes the script before starting
-the first job step. This may be used to purge files, enable user login, etc.
-By default there is no prolog. Any configured script is expected to
-complete execution quickly (in less time than \fBMessageTimeout\fR).
-See \fBProlog and Epilog Scripts\fR for more information.
+"/usr/local/slurm/prolog"). A glob pattern (See \fBglob\fR(7)) may
+also be used to specify more than one program to run (e.g.
+"/etc/slurm/prolog.d/*"). The slurmd executes the prolog before starting
+the first job step. The prolog script or scripts may be used to purge files,
+enable user login, etc. By default there is no prolog. Any configured script
+is expected to complete execution quickly (in less time than
+\fBMessageTimeout\fR). See \fBProlog and Epilog Scripts\fR for more information.
.TP
\fBPrologSlurmctld\fR
@@ -1141,6 +1173,12 @@
While this program is running, the nodes associated with the job will be
have a POWER_UP/CONFIGURING flag set in their state, which can be readily
viewed.
+The slurmctld daemon will wait indefinitely for this program to complete.
+Once the program completes with an exit code of zero, the nodes will be
+considered ready for use and the program will be started.
+If some node can not be made available for use, the program should drain
+the node (typically using the scontrol command) and terminate with a non\-zero
+exit code.
A non\-zero exit code will result in the job being requeued (where possible)
or killed.
See \fBProlog and Epilog Scripts\fR for more information.
@@ -1236,6 +1274,27 @@
See \fBPropagateResourceLimits\fR above for a list of valid limit names.
.TP
+\fBRebootProgram\fR
+Program to be executed on each compute node to reboot it. Invoked on each node
+once it becomes idle after the command "scontrol reboot_nodes" is executed by
+an authorized user. After being rebooting, the node is returned to normal use.
+
+.TP
+\fBReconfigFlags\fR
+Flags to control various actions that may be taken when an "scontrol
+reconfig" command is issued. Currently the only option defined is:
+.RS
+.TP 17
+\fBKeepPartInfo\fR
+If set, an "scontrol reconfig" command will maintain the in\-memory
+state of partitions that may have been dynamically updated by
+"scontrol update". Partition information in the slurm.conf file will
+be merged with in\-memory data. The default is not set, and the
+"scontrol reconfig" will rebuild the partition information using only
+the definitions in the slurm.conf file.
+.RE
+
+.TP
\fBResumeProgram\fR
SLURM supports a mechanism to reduce power consumption on nodes that
remain idle for an extended period of time.
@@ -1369,6 +1428,15 @@
The default value is 30 seconds.
This option applies only to \fBSchedulerType=sched/backfill\fR.
.TP
+\fBbf_max_job_user=#\fR
+The maximum number of jobs per user to attempt backfill scheduling for,
+not counting jobs which cannot be started due to an association resource
+limit. One can set this limit to prevent users from flooding the backfill
+queue with jobs that cannot start and that prevent jobs from other users
+to start. This is similar to the MAXIJOB limit in Maui.
+The default value is 0, which means no limit.
+This option applies only to \fBSchedulerType=sched/backfill\fR.
+.TP
\fBbf_resolution=#\fR
The number of seconds in the resolution of data maintained about when jobs
begin and end.
@@ -1401,7 +1469,7 @@
.TP
\fBmax_switch_wait=#\fR
Maximum number of seconds that a job can delay execution waiting for the
-specified desired switch count. The default value is 60 seconds.
+specified desired switch count. The default value is 300 seconds.
.RE
.TP
@@ -1510,21 +1578,20 @@
.TP
\fBCR_CPU\fR
CPUs are consumable resources.
-There is no notion of sockets, cores or threads;
-do not define those values in the node specification. If these
-are defined, unexpected results will happen when hyper\-threading
-is enabled CPUs= should be used instead.
-On a multi\-core system, each core will be considered a CPU.
-On a multi\-core and hyper\-threaded system, each thread will be
-considered a CPU.
-On single\-core systems, each CPUs will be considered a CPU.
+Configure the number of \fBCPUs\fR on each node, which may be equal to the
+count of cores or hyper\-threads on the node depending upon the desired minimum
+resource allocation. The node's \fBSockets\fR, \fBCoresPerSocket\fR and
+\fBThreadsPerCore\fR may also be configured if desired and may result in job
+allocations which have improved locality.
+
.TP
\fBCR_CPU_Memory\fR
CPUs and memory are consumable resources.
-There is no notion of sockets, cores or threads;
-do not define those values in the node specification. If these
-are defined, unexpected results will happen when hyper\-threading
-is enabled CPUs= should be used instead.
+Configure the number of \fBCPUs\fR on each node, which may be equal to the
+count of cores or hyper\-threads on the node depending upon the desired minimum
+resource allocation. The node's \fBSockets\fR, \fBCoresPerSocket\fR and
+\fBThreadsPerCore\fR may also be configured if desired and may result in job
+allocations which have improved locality.
Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
.TP
\fBCR_Core\fR
@@ -1532,12 +1599,16 @@
On nodes with hyper\-threads, each thread is counted as a CPU to
satisfy a job's resource requirement, but multiple jobs are not
allocated threads on the same core.
+The count of CPUs allocated to a job may be rounded up to account for every
+CPU on an allocated core.
.TP
\fBCR_Core_Memory\fR
Cores and memory are consumable resources.
On nodes with hyper\-threads, each thread is counted as a CPU to
satisfy a job's resource requirement, but multiple jobs are not
allocated threads on the same core.
+The count of CPUs allocated to a job may be rounded up to account for every
+CPU on an allocated core.
Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
.TP
\fBCR_ONE_TASK_PER_CORE\fR
@@ -1556,16 +1627,16 @@
On nodes with multiple cores, each core or thread is counted as a CPU
to satisfy a job's resource requirement, but multiple jobs are not
allocated resources on the same socket.
-Note that jobs requesting one CPU will only be allocated
-that one CPU, but no other job will share the socket.
+The count of CPUs allocated to a job may be rounded up to account for every
+CPU on an allocated socket.
.TP
\fBCR_Socket_Memory\fR
Memory and sockets are consumable resources.
On nodes with multiple cores, each core or thread is counted as a CPU
to satisfy a job's resource requirement, but multiple jobs are not
allocated resources on the same socket.
-Note that jobs requesting one CPU will only be allocated
-that one CPU, but no other job will share the socket.
+The count of CPUs allocated to a job may be rounded up to account for every
+CPU on an allocated socket.
Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
.TP
\fBCR_Memory\fR
@@ -1592,9 +1663,39 @@
.TP
\fBSlurmctldDebug\fR
The level of detail to provide \fBslurmctld\fR daemon's logs.
-Values from 0 to 9 are legal, with `0' being "quiet" operation and `9'
-being insanely verbose.
-The default value is 3.
+The default valus is \fBinfo\fR.
+.RS
+.TP 10
+\fBquiet\fR
+Log nothing
+.TP
+\fBfatal\fR
+Log only fatal errors
+.TP
+\fBerror\fR
+Log only errors
+.TP
+\fBinfo\fR
+Log errors and general informational messages
+.TP
+\fBverbose\fR
+Log errors and verbose informational messages
+.TP
+\fBdebug\fR
+Log errors and verbose informational messages and debugging messages
+.TP
+\fBdebug2\fR
+Log errors and verbose informational messages and more debugging messages
+.TP
+\fBdebug3\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug4\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug5\fR
+Log errors and verbose informational messages and even more debugging messages
+.RE
.TP
\fBSlurmctldLogFile\fR
@@ -1631,9 +1732,39 @@
.TP
\fBSlurmdDebug\fR
The level of detail to provide \fBslurmd\fR daemon's logs.
-Values from 0 to 9 are legal, with `0' being "quiet" operation and `9' being
-insanely verbose.
-The default value is 3.
+The default valus is \fBinfo\fR.
+.RS
+.TP 10
+\fBquiet\fR
+Log nothing
+.TP
+\fBfatal\fR
+Log only fatal errors
+.TP
+\fBerror\fR
+Log only errors
+.TP
+\fBinfo\fR
+Log errors and general informational messages
+.TP
+\fBverbose\fR
+Log errors and verbose informational messages
+.TP
+\fBdebug\fR
+Log errors and verbose informational messages and debugging messages
+.TP
+\fBdebug2\fR
+Log errors and verbose informational messages and more debugging messages
+.TP
+\fBdebug3\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug4\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug5\fR
+Log errors and verbose informational messages and even more debugging messages
+.RE
.TP
\fBSlurmdLogFile\fR
@@ -1642,6 +1773,8 @@
The default value is none (performs logging via syslog).
Any "%h" within the name is replaced with the hostname on which the
\fBslurmd\fR is running.
+Any "%n" within the name is replaced with the SLURM node name on which the
+\fBslurmd\fR is running.
.br
See the section \fBLOGGING\fR if a pathname is specified.
@@ -1649,6 +1782,10 @@
\fBSlurmdPidFile\fR
Fully qualified pathname of a file into which the \fBslurmd\fR daemon may write
its process id. This may be used for automated signal processing.
+Any "%h" within the name is replaced with the hostname on which the
+\fBslurmd\fR is running.
+Any "%n" within the name is replaced with the SLURM node name on which the
+\fBslurmd\fR is running.
The default value is "/var/run/slurmd.pid".
.TP
@@ -1738,7 +1875,7 @@
readable and writable by both systems.
Since all running and pending job information is stored here, the use of
a reliable file system (e.g. RAID) is recommended.
-The default value is "/tmp".
+The default value is "/var/spool".
If any slurm daemons terminate abnormally, their core files will also be written
into this directory.
@@ -2007,7 +2144,7 @@
Optimal system performance can typically be achieved if \fBTreeWidth\fR
is set to the square root of the number of nodes in the cluster for
systems having no more than 2500 nodes or the cube root for larger
-systems.
+systems. The value may not exceed 65533.
.TP
\fBUnkillableStepProgram\fR
@@ -2072,7 +2209,7 @@
Specifies how many seconds the srun command should by default wait after
the first task terminates before terminating all remaining tasks. The
"\-\-wait" option on the srun command line overrides this value.
-If set to 0, this feature is disabled.
+The default value is 0, which disables this feature.
May not exceed 65533 seconds.
.LP
@@ -2107,6 +2244,8 @@
in the configuration file with multiple entries where "NodeName=DEFAULT".
The "NodeName=" specification must be placed on every line
describing the configuration of nodes.
+A single node name can not appear as a NodeName value in more than one line
+(duplicate node name records will be ignored).
In fact, it is generally possible and desirable to define the
configurations of all nodes in only a few lines.
This convention permits significant optimization in the scheduling
@@ -2199,7 +2338,7 @@
(e.g. "NodeName=lx[0\-7] NodeAddr="elx[0\-7]").
\fBNodeAddr\fR may also contain IP addresses.
By default, the \fBNodeAddr\fR will be identical in value to
-\fBNodeName\fR.
+\fBNodeHostname\fR.
.TP
\fBCoresPerSocket\fR
@@ -2236,7 +2375,8 @@
(e.g. "Gres=bandwidth:10000,gpu:2").
A suffix of "K", "M" or "G" may be used to mulitply the number by 1024,
1048576 or 1073741824 respectively (e.g. "Gres=bandwidth:4G,gpu:4")..
-By default a node has no generic resources.
+By default a node has no generic resources and its maximum count is
+4,294,967,295.
Also see \fBFeature\fR.
.TP
@@ -2365,6 +2505,12 @@
"FAILING, "IDLE", and "UNKNOWN".
.RS
.TP 10
+\fBCLOUD\fP
+Indicates the node exists in the cloud.
+It's initial state will be treated as powered down.
+The node will be available for use after it's state is recovered from SLURM's
+state save file or the slurmd daemon starts on the compute node.
+.TP
\fBDOWN\fP
Indicates the node failed and is unavailable to be allocated work.
.TP
@@ -2387,7 +2533,7 @@
for use simply by updating the node state using the scontrol command rather
than restarting the slurmctld daemon. After these nodes are made available,
change their \fRState\fR in the slurm.conf file. Until these nodes are made
-available, they will not be seen using any SLURM commands or Is nor will
+available, they will not be seen using any SLURM commands or nor will
any attempt be made to contact them.
.TP
\fBUNKNOWN\fP
@@ -2472,6 +2618,10 @@
The default value is "UNKNOWN".
Also see the \fBDownNodes\fR parameter below.
+For example: "FrontendName=frontend[00\-03] FrontendAddr=efrontend[00\-03]
+State=UNKNOWN" is used to define four front end nodes for running slurmd
+daemons.
+
.LP
The partition configuration permits you to establish different job
limits or access controls for various groups (or partitions) of nodes.
@@ -2488,6 +2638,8 @@
in the configuration file with multiple entries where "PartitionName=DEFAULT".
The "PartitionName=" specification must be placed on every line
describing the configuration of partitions.
+A single partition name can not appear as a PartitionName value in more than
+one line (duplicate partition name records will be ignored).
If a partition that is in use is deleted from the configuration and slurm
is restarted or reconfigured (scontrol reconfigure), jobs using the partition
are canceled.
@@ -2688,6 +2840,14 @@
priority.
The value may not exceed 65533.
+.TP
+\fBReqResv\fR
+Specifies users of this partition are required to designate a reservation
+when submitting a job. This option can be useful in restricting usage
+of a partition that may have higher priority or additional resources to be
+allowed only within a reservation.
+Possible values are "YES" and "NO".
+The default value is "NO".
.TP
\fBRootOnly\fR
@@ -3222,7 +3382,6 @@
}
.br
-
.SH "COPYING"
Copyright (C) 2002\-2007 The Regents of the University of California.
Copyright (C) 2008\-2010 Lawrence Livermore National Security.
diff --git a/doc/man/man5/slurmdbd.conf.5 b/doc/man/man5/slurmdbd.conf.5
index bfcf88c..5177056 100644
--- a/doc/man/man5/slurmdbd.conf.5
+++ b/doc/man/man5/slurmdbd.conf.5
@@ -1,4 +1,4 @@
-.TH "slurmdbd.conf" "5" "August 2010" "slurmdbd.conf 2.2" "Slurm configuration file"
+.TH "slurmdbd.conf" "5" "March 2012" "slurmdbd.conf 2.4" "Slurm configuration file"
.SH "NAME"
slurmdbd.conf \- Slurm Database Daemon (SlurmDBD) configuration file
@@ -18,6 +18,9 @@
.LP
This file should be only on the computer where SlurmDBD executes and
should only be readable by the user which executes SlurmDBD (e.g. "slurm").
+If the slurmdbd daemon is started as user root and changes to another
+user ID, the configuration file will initially be read as user root, but will
+be read as the other user ID in response to a SIGHUP signal.
This file should be protected from unauthorized access since it
contains a database password.
The overall configuration parameters available include:
@@ -26,8 +29,8 @@
\fBArchiveDir\fR
If ArchiveScript is not set the slurmdbd will generate a file that can be
read in anytime with sacctmgr load filename. This directory is where the
-file will be placed archive has ran. Default is /tmp. The format for this
-files name is
+file will be placed after a purge event has happened and archive for that
+element is set to true. Default is /tmp. The format for this files name is
.na
$ArchiveDir/$ClusterName_$ArchiveObject_archive_$BeginTimeStamp_$endTimeStamp
.ad
@@ -114,8 +117,8 @@
"http://www.theether.org/authd/" for more information).
"auth/munge" indicates that LLNL's Munge system is to be used
(this is the best supported authentication mechanism for SLURM,
-see "http://home.gna.org/munge/" for more information).
-SlurmDbd must be terminated prior to changing the value of \fBAuthType\fR
+see "https://code.google.com/p/munge/" for more information).
+SlurmDBD must be terminated prior to changing the value of \fBAuthType\fR
and later restarted.
.TP
@@ -145,9 +148,39 @@
.TP
\fBDebugLevel\fR
The level of detail to provide the Slurm Database Daemon's logs.
-Values from 0 to 9 are legal, with `0' being "quiet" operation and
-`9' being insanely verbose.
-The default value is 3.
+The default valus is \fBinfo\fR.
+.RS
+.TP 10
+\fBquiet\fR
+Log nothing
+.TP
+\fBfatal\fR
+Log only fatal errors
+.TP
+\fBerror\fR
+Log only errors
+.TP
+\fBinfo\fR
+Log errors and general informational messages
+.TP
+\fBverbose\fR
+Log errors and verbose informational messages
+.TP
+\fBdebug\fR
+Log errors and verbose informational messages and debugging messages
+.TP
+\fBdebug2\fR
+Log errors and verbose informational messages and more debugging messages
+.TP
+\fBdebug3\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug4\fR
+Log errors and verbose informational messages and even more debugging messages
+.TP
+\fBdebug5\fR
+Log errors and verbose informational messages and even more debugging messages
+.RE
.TP
\fBDefaultQOS\fR
diff --git a/doc/man/man5/topology.conf.5 b/doc/man/man5/topology.conf.5
index 7bae1d7d..dd046ab 100644
--- a/doc/man/man5/topology.conf.5
+++ b/doc/man/man5/topology.conf.5
@@ -1,4 +1,4 @@
-.TH "topology.conf" "5" "March 2009" "topology.conf 2.0" "Slurm configuration file"
+.TH "topology.conf" "5" "December 2011" "topology.conf 2.0" "Slurm configuration file"
.SH "NAME"
topology.conf \- Slurm configuration file for defining the network topology
@@ -26,6 +26,10 @@
and "Switches=s[0\-2,4\-8,12]" will parse fine).
An optional link speed may also be specified.
.LP
+The \fBtopology.conf\fP file for an Infiniband switch can be automatically
+generated using the \fBib2slurm\fP tool found here:
+<https://github.com/fintler/ib2slurm>.
+.LP
The overall configuration parameters available include:
.TP
diff --git a/doc/man/man8/Makefile.am b/doc/man/man8/Makefile.am
new file mode 100644
index 0000000..7334555
--- /dev/null
+++ b/doc/man/man8/Makefile.am
@@ -0,0 +1,29 @@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+
+man8_MANS = slurmctld.8 \
+ slurmd.8 \
+ slurmdbd.8 \
+ slurmstepd.8 \
+ spank.8
+
+EXTRA_DIST = $(man8_MANS)
+
+if HAVE_MAN2HTML
+
+html_DATA = \
+ slurmctld.html \
+ slurmd.html \
+ slurmdbd.html \
+ slurmstepd.html \
+ spank.html
+
+MOSTLYCLEANFILES = ${html_DATA}
+
+EXTRA_DIST += $(html_DATA)
+
+SUFFIXES = .html
+
+.8.html:
+ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+endif
diff --git a/doc/man/man8/Makefile.in b/doc/man/man8/Makefile.in
new file mode 100644
index 0000000..18ef523
--- /dev/null
+++ b/doc/man/man8/Makefile.in
@@ -0,0 +1,606 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_MAN2HTML_TRUE@am__append_1 = $(html_DATA)
+subdir = doc/man/man8
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+man8dir = $(mandir)/man8
+am__installdirs = "$(DESTDIR)$(man8dir)" "$(DESTDIR)$(htmldir)"
+NROFF = nroff
+MANS = $(man8_MANS)
+DATA = $(html_DATA)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = ${prefix}/share/doc/@PACKAGE@-@VERSION@/html
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+man8_MANS = slurmctld.8 \
+ slurmd.8 \
+ slurmdbd.8 \
+ slurmstepd.8 \
+ spank.8
+
+EXTRA_DIST = $(man8_MANS) $(am__append_1)
+@HAVE_MAN2HTML_TRUE@html_DATA = \
+@HAVE_MAN2HTML_TRUE@ slurmctld.html \
+@HAVE_MAN2HTML_TRUE@ slurmd.html \
+@HAVE_MAN2HTML_TRUE@ slurmdbd.html \
+@HAVE_MAN2HTML_TRUE@ slurmstepd.html \
+@HAVE_MAN2HTML_TRUE@ spank.html
+
+@HAVE_MAN2HTML_TRUE@MOSTLYCLEANFILES = ${html_DATA}
+@HAVE_MAN2HTML_TRUE@SUFFIXES = .html
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .html .8
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/man/man8/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu doc/man/man8/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-man8: $(man8_MANS)
+ @$(NORMAL_INSTALL)
+ test -z "$(man8dir)" || $(MKDIR_P) "$(DESTDIR)$(man8dir)"
+ @list='$(man8_MANS)'; test -n "$(man8dir)" || exit 0; \
+ { for i in $$list; do echo "$$i"; done; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^8][0-9a-z]*$$,8,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man8dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man8dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+ while read files; do \
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man8dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man8dir)" || exit $$?; }; \
+ done; }
+
+uninstall-man8:
+ @$(NORMAL_UNINSTALL)
+ @list='$(man8_MANS)'; test -n "$(man8dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^8][0-9a-z]*$$,8,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ dir='$(DESTDIR)$(man8dir)'; $(am__uninstall_files_from_dir)
+install-htmlDATA: $(html_DATA)
+ @$(NORMAL_INSTALL)
+ test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \
+ done
+
+uninstall-htmlDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(html_DATA)'; test -n "$(htmldir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir)
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(MANS) $(DATA)
+installdirs:
+ for dir in "$(DESTDIR)$(man8dir)" "$(DESTDIR)$(htmldir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+ -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-htmlDATA install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man8
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-htmlDATA uninstall-man
+
+uninstall-man: uninstall-man8
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-htmlDATA install-info install-info-am install-man \
+ install-man8 install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am uninstall uninstall-am uninstall-htmlDATA \
+ uninstall-man uninstall-man8
+
+
+@HAVE_MAN2HTML_TRUE@.8.html:
+@HAVE_MAN2HTML_TRUE@ `dirname $<`/../man2html.py $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/man/man8/slurmctld.8 b/doc/man/man8/slurmctld.8
index 2828f07..4fd3cc0 100644
--- a/doc/man/man8/slurmctld.8
+++ b/doc/man/man8/slurmctld.8
@@ -75,8 +75,10 @@
written to the current working directory.
Otherwise if \fBSlurmctldLogFile\fR is a fully qualified path name (starting
with a slash), the core file will be written to the same directory as the
-log file.
-Otherwise the core file will be written to the \fBStateSaveLocation\fR.
+log file, provided SlurmUser has write permission on the directory.
+Otherwise the core file will be written to the \fBStateSaveLocation\fR,
+or "/var/tmp/" as a last resort. If none of the above directories have
+write permission for SlurmUser, no core file will be produced.
The command "scontrol abort" can be used to abort the slurmctld daemon and
generate a core file.
diff --git a/doc/man/man8/slurmd.8 b/doc/man/man8/slurmd.8
index 9852731..c025930 100644
--- a/doc/man/man8/slurmd.8
+++ b/doc/man/man8/slurmd.8
@@ -1,4 +1,4 @@
-.TH SLURMD "8" "March 2010" "slurmd 2.2" "Slurm components"
+.TH SLURMD "8" "March 2012" "slurmd 2.4" "Slurm components"
.SH "NAME"
slurmd \- The compute node daemon for SLURM.
@@ -20,7 +20,7 @@
.TP
\fB\-C\fR
Print actual hardware configuration and exit. The format of output is the same
-as used in \fBslurm.conf\fR to describe a node's configuration.
+as used in \fBslurm.conf\fR to describe a node's configuration plus it's uptime.
.TP
\fB\-d <file>\fR
@@ -72,6 +72,16 @@
The location of the SLURM configuration file. This is overridden by
explicitly naming a configuration file on the command line.
+.SH "CORE FILE LOCATION"
+If slurmd is started with the \fB\-D\fR option then the core file will be
+written to the current working directory.
+Otherwise if \fBSlurmdLogFile\fR is a fully qualified path name
+(starting with a slash), the core file will be written to the same
+directory as the log file. Otherwise the core file will be written to
+the \fBSlurmSpoolDir\fR directory, or "/var/tmp/" as a last resort. If
+none of the above directories can be written, no core file will be
+produced.
+
.SH "NOTES"
It may be useful to experiment with different \fBslurmd\fR specific
configuration parameters using a distinct configuration file
diff --git a/doc/man/man8/slurmdbd.8 b/doc/man/man8/slurmdbd.8
index 077bdfa..2dabcb9 100644
--- a/doc/man/man8/slurmdbd.8
+++ b/doc/man/man8/slurmdbd.8
@@ -26,6 +26,16 @@
\fB\-V\fR
Print version information and exit.
+.SH "CORE FILE LOCATION"
+If slurmdbd is started with the \fB\-D\fR option then the core file will be
+written to the current working directory.
+Otherwise if \fBLogFile\fR in "slurmdbd.conf" is a fully qualified
+path name (starting with a slash), the core file will be written to
+the same directory as the log file, provided SlurmUser has write
+permission on the directory. Otherwise the core file will be written
+to "/var/tmp/" as a last resort. If neither of the above directories
+have write permission for SlurmUser, no core file will be produced.
+
.SH "NOTES"
It may be useful to experiment with different \fBslurmctld\fR specific
configuration parameters using a distinct configuration file
diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8
index 233edff..fc39301 100644
--- a/doc/man/man8/spank.8
+++ b/doc/man/man8/spank.8
@@ -20,7 +20,7 @@
.LP
.SH "SPANK PLUGINS"
-\fBSPANK\fR plugins are loaded in up to three separate contexts during a
+\fBSPANK\fR plugins are loaded in up to five separate contexts during a
\fBSLURM\fR job. Briefly, the three contexts are:
.TP 8
\fBlocal\fB
@@ -28,16 +28,36 @@
part of a parallel job).
.TP
\fBremote\fR
-In \fBremote\fR context, the plugin is loaded by \fBslurmd\fR. (i.e. the "remote"
+In \fBremote\fR context, the plugin is loaded by \fBslurmstepd\fR. (i.e. the "remote"
part of a parallel job).
.TP
\fBallocator\fR
In \fBallocator\fR context, the plugin is loaded in one of the job allocation
utilities \fBsbatch\fR or \fBsalloc\fR.
.LP
+.TP
+\fBslurmd\fR In \fBslurmd\fR context, the plugin is loaded in the
+\fBslurmd\fR daemon itself. \fBNote\fR: Plugins loaded in slurmd context
+persist for the entire time slurmd is running, so if configuration is
+changed or plugins are updated, slurmd must be restarted for the changes
+to take effect.
+.LP
+.TP
+\fBjob_script\fR
+In the \fBjob_script\fR context, plugins are loaded in the context of the
+job prolog or epilog. \fBNote\fR: Plugins are loaded in \fBjob_script\fR
+context on each run on the job prolog or epilog, in a separate address
+space from plugins in \fBslurmd\fR context. This means there is no
+state shared between this context and other contexts, or even between
+one call to \fBslurm_spank_job_prolog\fR or \fBslurm_spank_job_epilog\fR
+and subsequent calls.
+.LP
In local context, only the \fBinit\fR, \fBexit\fR, \fBinit_post_opt\fR, and
\fBuser_local_init\fR functions are called. In allocator context, only the
\fBinit\fR, \fBexit\fR, and \fBinit_post_opt\fR functions are called.
+Similarly, in slurmd context, only the \fBslurmd_init\fR and \fBslurmd_exit\fR
+callbacks are active, and in the job_script context, only the \fBjob_prolog\fR
+and \fRjob_epilog\fR callbacks are used.
Plugins may query the context in which they are running with the
\fBspank_context\fR and \fBspank_remote\fR functions defined in
\fB<slurm/spank.h>\fR.
@@ -48,7 +68,13 @@
\fBslurm_spank_init\fR
Called just after plugins are loaded. In remote context, this is just
after job step is initialized. This function is called before any plugin
-option processing.
+option processing. This function is not called in slurmd context.
+.TP
+\fBslurm_spank_slurmd_init\fR
+Called in slurmd just after the daemon is started.
+.TP
+\fBslurm_spank_job_prolog\fR
+Called at the same time as the job prolog.
.TP
\fBslurm_spank_init_post_opt\fR
Called at the same point as \fBslurm_spank_init\fR, but after all
@@ -88,6 +114,12 @@
\fBslurm_spank_exit\fR
Called once just before \fBslurmstepd\fR exits in remote context.
In local context, called before \fBsrun\fR exits.
+.TP
+\fBslurm_spank_job_epilog\fR
+Called at the same time as the job epilog.
+.TP
+\fBslurm_spank_slurmd_exit\fR
+Called in slurmd when the daemon is shut down.
.LP
All of these functions have the same prototype, for example:
.nf
@@ -311,6 +343,22 @@
handler is called. This allows plugins to modify behavior of all plugin
functionality based on the value of user\-provided options.
(See EXAMPLES below for a plugin that registers an option with \fBSLURM\fR).
+.LP
+As an alternative to use of an option callback and global variable,
+plugins can use the \fBspank_option_getopt\fR option to check for
+supplied options after option processing. This function has the prototype:
+.nf
+
+ spank_err_t spank_option_getopt(spank_t sp,
+ struct spank_option *opt, char **optargp);
+
+.nf
+This function returns \fBESPANK_SUCCESS\fR if the option defined in the
+struct spank_option \fIopt\fR has been used by the user. If \fIoptargp\fR
+is non-NULL then it is set to any option argument passed (if the option
+takes an argument). The use of this method is \fIrequired\fR to process
+options in \fBjob_script\fR context (\fBslurm_spank_job_prolog\R and
+\fBslurm_spank_job_epilog\fR).
.SH "CONFIGURATION"
.LP
diff --git a/etc/bluegene.conf.example b/etc/bluegene.conf.example
index f94fe38..043b137 100644
--- a/etc/bluegene.conf.example
+++ b/etc/bluegene.conf.example
@@ -1,30 +1,28 @@
###############################################################################
# Global specifications for BlueGene system
+# NOTE: BlueGene/L, P, and Q systems require different Image specifications.
+# See "man bluegene.conf" for details This example is for a BGQ system
+# Look at older <=2.3 for examples of L or P
#
-# BlrtsImage: BlrtsImage used for creation of all bgblocks.
-# LinuxImage: LinuxImage used for creation of all bgblocks.
# MloaderImage: MloaderImage used for creation of all bgblocks.
-# RamDiskImage: RamDiskImage used for creation of all bgblocks.
#
-# You may add extra images which a user can specify from the srun
-# command line (see man srun). When adding these images you may also add
-# a Groups= at the end of the image path to specify which groups can
+# You may add extra images which a user can specify from the srun
+# command line (see man srun). When adding these images you may also add
+# a Groups= at the end of the image path to specify which groups can
# use the image.
#
-# AltBlrtsImage: Alternative BlrtsImage(s).
-# AltLinuxImage: Alternative LinuxImage(s).
# AltMloaderImage: Alternative MloaderImage(s).
-# AltRamDiskImage: Alternative RamDiskImage(s).
#
+# AllowSubBlockAllocation: This option allows multiple users to
+# run jobs as small as 1 cnode in size on a block
+# one midplane in size and smaller.
# LayoutMode: Mode in which slurm will create blocks:
# STATIC: Use defined non-overlapping bgblocks
# OVERLAP: Use defined bgblocks, which may overlap
# DYNAMIC: Create bgblocks as needed for each job
-# BasePartitionNodeCnt: Number of c-nodes per base partition.
+# MidplaneNodeCnt: Number of c-nodes per midplane.
# NodeCardNodeCnt: Number of c-nodes per node card.
-# NumPsets: The Numpsets used for creation of all bgblocks
-# equals this value multiplied by the number of
-# base partitions in the bgblock.
+# IONodesPerMP: Number of I/O nodes per midplane.
#
# BridgeAPILogFile : Pathname of file in which to write the Bridge
# API logs.
@@ -35,77 +33,69 @@
# 3: Log level 2 and more debug message
# 4: Log all messages
# DenyPassthrough: Prevents use of passthrough ports in specific
-# dimensions, X, Y, and/or Z, plus ALL
-#
-# NOTE: The bgl_serial value is set at configuration time using the
-# "--with-bgl-serial=" option. Its default value is "BGL".
+# dimensions, A, X, Y, and/or Z, plus ALL
+#
###############################################################################
-# These are the default images with are used if the user doesn't specify
+# These are the default images with are used if the user doesn't specify
# which image they want
-BlrtsImage=/bgl/BlueLight/ppcfloor/bglsys/bin/rts_hw.rts
-LinuxImage=/bgl/BlueLight/ppcfloor/bglsys/bin/zImage.elf
-MloaderImage=/bgl/BlueLight/ppcfloor/bglsys/bin/mmcs-mloader.rts
-RamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk.elf
+MloaderImage=/bgsys/drivers/ppcfloor/boot/firmware
-#Only group jette can use these images
-AltBlrtsImage=/bgl/BlueLight/ppcfloor/bglsys/bin/rts_hw2.rts Groups=jette
-AltLinuxImage=/bgl/BlueLight/ppcfloor/bglsys/bin/zImage2.elf Groups=jette
-AltMloaderImage=/bgl/BlueLight/ppcfloor/bglsys/bin/mmcs-mloader2.rts Groups=jette
-AltRamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk2.elf Groups=jette
+#Only group jette can use this image
+AltMloaderImage=/bgsys/drivers/ppcfloor/boot/firmware2 Groups=jette
# Since no groups are specified here any user can use them
-AltBlrtsImage=/bgl/BlueLight/ppcfloor/bglsys/bin/rts_hw3.rts
-AltLinuxImage=/bgl/BlueLight/ppcfloor/bglsys/bin/zImage3.elf
-AltMloaderImage=/bgl/BlueLight/ppcfloor/bglsys/bin/mmcs-mloader3.rts
-AltRamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk3.elf
+AltMloaderImage=/bgsys/drivers/ppcfloor/boot/firmware3
# Another option for images would be a "You can use anything you like image" *
# This allows the user to use any image entered with no security checking
-AltBlrtsImage=* Groups=da,adamb
-AltLinuxImage=* Groups=da,adamb
AltMloaderImage=* Groups=da,adamb
-AltRamDiskImage=* Groups=da,adamb
LayoutMode=STATIC
-BasePartitionNodeCnt=512
+#LayoutMode=Dynamic #suggested as default
+MidplaneNodeCnt=512
NodeCardNodeCnt=32
-#Numpsets=8 #used for IO poor systems (Can't create 32 cnode blocks)
-Numpsets=64 #used for IO rich systems
+IONodesPerMP=4 #used for IO poor systems (Can't create 32 c-node blocks, 128 is the smallest)
+#IONodesPerMP=16 #used for IO rich systems
BridgeAPILogFile=/var/log/slurm/bridgeapi.log
BridgeAPIVerbose=0
-#DenyPassthrough=X,Y,Z
+#DenyPassthrough=A,X,Y,Z
+
+AllowSubBlockAllocation=yes
###############################################################################
-# Define the static/overlap partitions (bgblocks)
+# Define the static/overlap blocks. This is ignored and is not needed if
+# using dynamic layout mode.
#
-# Nodes: The base partitions (midplanes) in the bgblock using XYZ coordinates
-# Type: Connection type "MESH" or "TORUS" or "SMALL", default is "TORUS"
+# This example assumes NodeName=bg[0000x3233] in the slurm.conf file.
+#
+# MPs: The midplanes in the block using AXYZ coordinates
+# Type: Connection type "MESH" or "TORUS" or "SMALL", default is "TORUS"
# Type SMALL will divide a midplane into multiple bgblock
# based on options NodeCards (count of single node card bglblocks)
-# and Quarters (count of quarter midplane bglblocks) to determine
-# type of small blocks.
-#
-# IMPORTANT NOTES:
+# and Quarters (count of quarter midplane bglblocks) to determine
+# type of small blocks. With Q you can have each dimension be a
+# different type. T,T,M,T means torus in all dimensions except the
+# Y dim.
+#
+# IMPORTANT NOTES:
# * Ordering is very important for laying out switch wires. Please create
# blocks with smap, and once done don't change the order of blocks created.
-# * A bgblock is implicitly created containing all resources on the system
-# * Bgblocks must not overlap in static mode (except for implicitly
-# created bgblock). This will be the case when smap is used to create
+# * A block is implicitly created containing all resources on the system
+# * Blocks must not overlap in static mode (except for implicitly
+# created bgblock). This will be the case when smap is used to create
# a configuration file.
# * All Nodes defined here must also be defined in the slurm.conf file
-# * Define only the numeric coordinates of the bgblocks here. The prefix
+# * Define only the numeric coordinates of the blocks here. The prefix
# will be based upon the NodeName defined in slurm.conf.
###############################################################################
# LEAVE NEXT LINE AS A COMMENT, Full-system bgblock, implicitly created
-# BPs=[000x333] Type=TORUS # 4x4x4 = 64 midplanes
+# MPs=[0000x0033] Type=TORUS # 4x3x4x4 = 192 midplanes
###############################################################################
-# smap bgblock layout here:
-BPs=[000x133] Type=TORUS # 2x4x4 = 32
-BPs=[200x233] Type=TORUS # 1x4x4 = 16
-BPs=[300x313] Type=TORUS # 1x2x4 = 8
-BPs=[320x323] Type=TORUS # 1x1x4 = 4
-BPs=[330x331] Type=TORUS # 1x1x2 = 2
-BPs=[332x332] Type=TORUS # 1x1x1 = 1
-BPs=[333x333] Type=SMALL NodeCards=4 Quarters=3 # 1x1x1 = 4-32 c-node blocks 3-128 c-node blocks
+# smap block layout here:
+MPs=[0000x0013] Type=T,T,M,T # 1x1x2x4 = 8 midplanes all dims in torus except Y
+MPs=[0020x0023] Type=TORUS # 1x1x1x4 = 4 midplanes all in torus
+MPs=[0030x0031] Type=TORUS # 1x1x1x2 = 2 midplanes all in torus
+MPs=[0032] Type=TORUS # 1 midplane (always will be in TORUS no matter what)
+MPs=[0033] Type=SMALL 256cnblocks=1 128cnblocks=2 # 1-256 c-node block 2-128 c-node blocks
diff --git a/etc/cgroup.conf.example b/etc/cgroup.conf.example
index 816c182..9012666 100644
--- a/etc/cgroup.conf.example
+++ b/etc/cgroup.conf.example
@@ -6,7 +6,7 @@
# information on cgroup configuration parameters
#--
CgroupAutomount=yes
-CgroupReleaseAgent="/etc/slurm/cgroup"
+CgroupReleaseAgentDir="/etc/slurm/cgroup"
ConstrainCores=no
ConstrainRAMSpace=no
diff --git a/etc/cgroup.release_common.example b/etc/cgroup.release_common.example
index f431d26..94acd9f 100644
--- a/etc/cgroup.release_common.example
+++ b/etc/cgroup.release_common.example
@@ -19,10 +19,10 @@
get_mount_dir()
{
local lssubsys=$(type -p lssubsys)
- if [ -x $lssubsys ]; then
+ if [[ $lssubsys ]]; then
$lssubsys -m $subsystem | awk '{print $2}'
else
- awk "/release_agent=$0/ { print \$2 }"
+ echo "/cgroup/$subsystem"
fi
}
diff --git a/etc/init.d.slurm b/etc/init.d.slurm.in
similarity index 98%
rename from etc/init.d.slurm
rename to etc/init.d.slurm.in
index 6f5b025..ec5d534 100644
--- a/etc/init.d.slurm
+++ b/etc/init.d.slurm.in
@@ -25,10 +25,10 @@
# Description: Start slurm to provide resource management
### END INIT INFO
-BINDIR=/usr/bin
-CONFDIR=/etc/slurm
-LIBDIR=/usr/lib
-SBINDIR=/usr/sbin
+BINDIR="@bindir@"
+CONFDIR="@sysconfdir@"
+LIBDIR="@libdir@"
+SBINDIR="@sbindir@"
# Source function library.
if [ -f /etc/rc.status ]; then
@@ -55,11 +55,6 @@
RETVAL=0
fi
-if [ ! -x $BINDIR/scontrol ]; then
- echo "Could not find $BINDIR/scontrol. Bad path?"
- exit 1
-fi
-
# We can not use a starter program without losing environment
# variables that are critical on Blue Gene systems
if [ -d /bgl/BlueLight/ppcfloor ]; then
@@ -80,6 +75,11 @@
SLURMD_OPTIONS=""
fi
+if [ ! -x $BINDIR/scontrol ]; then
+ echo "Could not find $BINDIR/scontrol. Bad path?"
+ exit 1
+fi
+
if [ ! -f $CONFDIR/slurm.conf ]; then
echo "Could not find $CONFDIR/slurm.conf. Bad path?"
exit 1
diff --git a/etc/init.d.slurmdbd b/etc/init.d.slurmdbd.in
similarity index 97%
rename from etc/init.d.slurmdbd
rename to etc/init.d.slurmdbd.in
index 3ea4b51..62ecf8a 100755
--- a/etc/init.d.slurmdbd
+++ b/etc/init.d.slurmdbd.in
@@ -21,9 +21,9 @@
# Description: Start slurm to provide database server for SLURM
### END INIT INFO
-CONFDIR=/etc/slurm
-SBINDIR=/usr/sbin
-LIBDIR=/usr/lib
+CONFDIR="@sysconfdir@"
+LIBDIR="@libdir@"
+SBINDIR="@sbindir@"
#Source function library.
if [ -f /etc/rc.status ]; then
diff --git a/etc/slurm.epilog.clean b/etc/slurm.epilog.clean
index 60df570..e829554 100644
--- a/etc/slurm.epilog.clean
+++ b/etc/slurm.epilog.clean
@@ -8,10 +8,10 @@
# SLURM_BIN can be used for testing with private version of SLURM
#SLURM_BIN="/usr/bin/"
#
-if [ x$SLURM_UID == "x" ] ; then
+if [ x$SLURM_UID = "x" ] ; then
exit 0
fi
-if [ x$SLURM_JOB_ID == "x" ] ; then
+if [ x$SLURM_JOB_ID = "x" ] ; then
exit 0
fi
diff --git a/slurm.spec b/slurm.spec
index f788e1c..553817c 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -5,12 +5,14 @@
#
# build options .rpmmacros options change to default action
# =============== ==================== ========================
+# --enable-salloc-background %_with_salloc_background 1 on a cray system alloc salloc to execute as a background process.
+# --prefix %_prefix path install path for commands, libraries, etc.
# --with aix %_with_aix 1 build aix RPM
# --with authd %_with_authd 1 build auth-authd RPM
# --with auth_none %_with_auth_none 1 build auth-none RPM
# --with blcr %_with_blcr 1 require blcr support
# --with bluegene %_with_bluegene 1 build bluegene RPM
-# --with cray_xt %_with_cray_xt 1 build for Cray XT system
+# --with cray %_with_cray 1 build for a Cray system
# --with debug %_with_debug 1 enable extra debugging within SLURM
# --with elan %_with_elan 1 build switch-elan RPM
# --with lua %_with_lua 1 build SLURM lua bindings (proctrack only for now)
@@ -23,7 +25,6 @@
# --with sgijob %_with_sgijob 1 build proctrack-sgi-job RPM
# --with sun_const %_with_sun_const 1 build for Sun Constellation system
# --with-srun2aprun %_with_srun2aprun 1 build srun as aprun wrapper
-
#
# Allow defining --with and --without build options or %_with and %without in .rpmmacors
# slurm_with builds option by default unless --without is specified
@@ -45,6 +46,7 @@
%slurm_without_opt elan
%slurm_without_opt sun_const
%slurm_without_opt srun2aprun
+%slurm_without_opt salloc_background
# These options are only here to force there to be these on the build.
# If they are not set they will still be compiled if the packages exist.
@@ -87,6 +89,10 @@
%slurm_with_opt sgijob
%endif
+%if %{slurm_with cray}
+%slurm_with_opt sgijob
+%endif
+
Name: see META file
Version: see META file
Release: see META file
@@ -146,6 +152,11 @@
BuildRequires: postgresql-devel >= 8.0.0
%endif
+%if %{slurm_with cray}
+BuildRequires: cray-MySQL-devel-enterprise
+Requires: cray-MySQL-devel-enterprise
+%endif
+
%ifnos aix5.3
# FIXME: AIX can't seem to find this even though this is in existance there.
# We should probably figure out a better way of doing this, but for now we
@@ -383,7 +394,6 @@
%build
%configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \
- %{?slurm_with_cray_xt:--enable-cray-xt} \
%{?slurm_with_debug:--enable-debug} \
%{?slurm_with_partial_attach:--enable-partial-attach} \
%{?slurm_with_sun_const:--enable-sun-const} \
@@ -398,6 +408,8 @@
%{?with_ssl} \
%{?with_munge} \
%{?with_blcr} \
+ %{?slurm_with_srun2aprun:--with-srun2aprun} \
+ %{?slurm_with_salloc_background:--enable-salloc-background} \
%{!?slurm_with_readline:--without-readline} \
%{?with_cflags}
@@ -417,6 +429,15 @@
install -D -m755 etc/init.d.slurmdbd $RPM_BUILD_ROOT/etc/init.d/slurmdbd
fi
%endif
+
+%if %{slurm_with cray}
+ if [ -d /opt/modulefiles ]; then
+ install -D -m755 contribs/cray/opt_modulefiles_slurm $RPM_BUILD_ROOT/opt/modulefiles/slurm/opt_modulefiles_slurm
+ fi
+%else
+ rm -f contribs/cray/opt_modulefiles_slurm
+%endif
+
install -D -m644 etc/slurm.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.conf.example
install -D -m644 etc/cgroup.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup.conf.example
install -D -m755 etc/cgroup_allowed_devices_file.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup_allowed_devices_file.conf.example
@@ -469,6 +490,10 @@
rm -f ${RPM_BUILD_ROOT}%{_libexecdir}/slurm/cr_*
%endif
+%if ! %{slurm_with sgijob}
+rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/proctrack_sgi_job.so
+%endif
+
# Build man pages that are generated directly by the tools
rm -f $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1
${RPM_BUILD_ROOT}%{_bindir}/sjobexitmod --roff > $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1
@@ -482,8 +507,9 @@
touch $LIST
test -f $RPM_BUILD_ROOT/etc/init.d/slurm &&
echo /etc/init.d/slurm >> $LIST
-test -f $RPM_BUILD_ROOT/%{_bindir}/sview &&
- echo %{_bindir}/sview >> $LIST
+
+test -f $RPM_BUILD_ROOT/opt/modulefiles/slurm/opt_modulefiles_slurm &&
+ echo /opt/modulefiles/slurm/opt_modulefiles_slurm >> $LIST
%if %{slurm_with aix}
install -D -m644 etc/federation.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/federation.conf.example
@@ -560,13 +586,7 @@
%files -f slurm.files
%defattr(-,root,root,0755)
-%doc AUTHORS
-%doc NEWS
-%doc README.rst
-%doc RELEASE_NOTES
-%doc DISCLAIMER
-%doc COPYING
-%doc doc/html
+%{_mandir}/../doc
%{_bindir}/s*
%{_sbindir}/slurmctld
%{_sbindir}/slurmd
@@ -589,6 +609,9 @@
%{_mandir}/man8/spank*
%dir %{_sysconfdir}
%dir %{_libdir}/slurm/src
+%if %{slurm_with cray}
+%dir /opt/modulefiles/slurm
+%endif
%config %{_sysconfdir}/slurm.conf.example
%config %{_sysconfdir}/cgroup.conf.example
%config %{_sysconfdir}/cgroup_allowed_devices_file.conf.example
@@ -703,6 +726,7 @@
%{_libdir}/slurm/gres_gpu.so
%{_libdir}/slurm/gres_nic.so
%{_libdir}/slurm/jobacct_gather_aix.so
+%{_libdir}/slurm/jobacct_gather_cgroup.so
%{_libdir}/slurm/jobacct_gather_linux.so
%{_libdir}/slurm/jobacct_gather_none.so
%{_libdir}/slurm/jobcomp_none.so
@@ -719,6 +743,7 @@
%{_libdir}/slurm/mpi_mvapich.so
%{_libdir}/slurm/mpi_none.so
%{_libdir}/slurm/mpi_openmpi.so
+%{_libdir}/slurm/mpi_pmi2.so
%{_libdir}/slurm/preempt_none.so
%{_libdir}/slurm/preempt_partition_prio.so
%{_libdir}/slurm/preempt_qos.so
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index b150130..39280ae 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -75,6 +75,12 @@
/* Define to 1 if emulating or running on Blue Gene/P system */
#undef HAVE_BGP
+/* Define to 1 if emulating or running on Blue Gene/L-P system */
+#undef HAVE_BG_L_P
+
+/* Define to 1 if emulating or running on Blue Gene/Q system */
+#undef HAVE_BGQ
+
/* Number of dimensions the system has */
#define SYSTEM_DIMENSIONS 1
#define HIGHEST_DIMENSIONS 5
@@ -349,6 +355,7 @@
SELECT_JOBDATA_BLOCK_NODE_CNT, /* data-> uint32_t block_cnode_cnt */
SELECT_JOBDATA_START_LOC, /* data-> uint16_t
* start_loc[SYSTEM_DIMENSIONS] */
+ SELECT_JOBDATA_USER_NAME, /* data-> char * user_name */
};
enum select_nodedata_type {
@@ -360,6 +367,10 @@
SELECT_NODEDATA_STR, /* data-> char * needs to be
* freed with xfree */
SELECT_NODEDATA_PTR, /* data-> select_nodeinfo_t *nodeinfo */
+ SELECT_NODEDATA_EXTRA_INFO, /* data-> char * needs to be
+ * freed with xfree */
+ SELECT_NODEDATA_RACK_MP, /* data-> char * needs to be
+ * freed with xfree */
};
enum select_print_mode {
@@ -489,8 +500,9 @@
NODE_STATE_FUTURE, /* node slot reserved for future use */
NODE_STATE_END /* last entry in table */
};
-#define NODE_STATE_BASE 0x00ff
-#define NODE_STATE_FLAGS 0xff00
+#define NODE_STATE_BASE 0x000f
+#define NODE_STATE_FLAGS 0xfff0
+#define NODE_STATE_CLOUD 0x0080 /* node comes from cloud */
#define NODE_RESUME 0x0100 /* Restore a DRAINED, DRAINING, DOWN
* or FAILING node to service (e.g.
* IDLE or ALLOCATED). Used in
@@ -514,6 +526,7 @@
* Values can be can be ORed */
#define SHOW_ALL 0x0001 /* Show info for "hidden" partitions */
#define SHOW_DETAIL 0x0002 /* Show detailed resource information */
+#define SHOW_DETAIL2 0x0004 /* Show batch script listing */
/* Define keys for ctx_key argument of slurm_step_ctx_get() */
enum ctx_keys {
@@ -576,6 +589,11 @@
#define PROP_PRIO_NICER 0x0002 /* Insure that user tasks have a nice
* value that is higher than slurmd */
+#define PRIORITY_FLAGS_ACCRUE_ALWAYS 0x0001 /* Flag to always accrue age
+ * priority to pending jobs ignoring
+ * dependencies or holds */
+
+
/*****************************************************************************\
* SLURM HOSTLIST FUNCTIONS
\*****************************************************************************/
@@ -1246,6 +1264,7 @@
} slurm_step_ctx_params_t;
typedef struct {
+ char *alias_list; /* node name/address/hostnamne aliases */
uint32_t argc;
char **argv;
uint32_t envc;
@@ -1366,8 +1385,8 @@
} job_step_stat_t;
typedef struct {
- uint32_t job_id;
- List stats_list; /* List of job_step_stat_t *'s */
+ uint32_t job_id; /* job ID */
+ List stats_list; /* List of job_step_stat_t *'s */
uint32_t step_id;
} job_step_stat_response_msg_t;
@@ -1456,6 +1475,7 @@
#define PART_FLAG_HIDDEN 0x0002 /* Set if partition is hidden */
#define PART_FLAG_NO_ROOT 0x0004 /* Set if user root jobs disabled */
#define PART_FLAG_ROOT_ONLY 0x0008 /* Set if only root can submit jobs */
+#define PART_FLAG_REQ_RESV 0x0010 /* Set if reservation is required */
/* Used with slurm_update_partition() to clear flags associated with existing
* partitions. For example, if a partition is currently hidden and you want
* to make it visible then set flags to PART_FLAG_HIDDEN_CLR and call
@@ -1464,6 +1484,7 @@
#define PART_FLAG_HIDDEN_CLR 0x0200 /* Clear HIDDEN partition flag */
#define PART_FLAG_NO_ROOT_CLR 0x0400 /* Clear NO_ROOT partition flag */
#define PART_FLAG_ROOT_ONLY_CLR 0x0800 /* Clear ROOT_ONLY partition flag */
+#define PART_FLAG_REQ_RESV_CLR 0x1000 /* Clear RES_REQ partition flag */
typedef struct partition_info {
char *allow_alloc_nodes;/* list names of allowed allocating nodes */
@@ -1498,6 +1519,7 @@
typedef struct resource_allocation_response_msg {
uint32_t job_id; /* assigned job id */
char *node_list; /* assigned list of nodes */
+ char *alias_list; /* node name/address/hostnamne aliases */
uint32_t num_cpu_groups;/* elements in below cpu arrays */
uint16_t *cpus_per_node;/* cpus per node */
uint32_t *cpu_count_reps;/* how many nodes have same cpu count */
@@ -1554,26 +1576,20 @@
char *blrtsimage; /* BlrtsImage for this block */
uint16_t conn_type[HIGHEST_DIMENSIONS];
uint32_t cnode_cnt;
+ uint32_t cnode_err_cnt;
int *ionode_inx; /* list index pairs for ionodes in the
* node listed for *ionode_str:
* start_range_1, end_range_1,
* start_range_2, .., -1 */
char *ionode_str;
List job_list; /* List of running jobs on each block */
- uint32_t job_running;
char *linuximage; /* LinuxImage for this block */
char *mloaderimage; /* mloaderImage for this block */
int *mp_inx; /* list index pairs into node_table for *mp_str:
* start_range_1, end_range_1,
* start_range_2, .., -1 */
char *mp_str;
- int *mp_used_inx; /* list index pairs into node_table
- * for used *mp_str:
- * start_range_1, end_range_1,
- * start_range_2, .., -1 */
- char *mp_used_str;
uint16_t node_use;
- char *owner_name;
char *ramdiskimage; /* RamDiskImage for this block */
char *reason;
uint16_t state;
@@ -1646,7 +1662,6 @@
void slurm_init_update_block_msg PARAMS((update_block_msg_t *update_block_msg));
-
/*********************************/
/*
@@ -1663,6 +1678,8 @@
#define RESERVE_FLAG_NO_IGN_JOB 0x0080 /* Clear ignore running jobs */
#define RESERVE_FLAG_LIC_ONLY 0x0100 /* Reserve licenses only, any nodes */
#define RESERVE_FLAG_NO_LIC_ONLY 0x0200 /* Clear reserve licenses only flag */
+#define RESERVE_FLAG_STATIC 0x0400 /* Static node allocation */
+#define RESERVE_FLAG_NO_STATIC 0x0800 /* Clear static node allocation */
#define RESERVE_FLAG_OVERLAP 0x4000 /* Permit to overlap others */
#define RESERVE_FLAG_SPEC_NODES 0x8000 /* Contains specific nodes */
@@ -1697,7 +1714,11 @@
uint16_t flags; /* see RESERVE_FLAG_* above */
char *licenses; /* names of licenses to be reserved */
char *name; /* name of reservation (optional on create) */
- uint32_t node_cnt; /* count of nodes required */
+ uint32_t *node_cnt; /* Count of nodes required. Specify set of job
+ * sizes with trailing zero to optimize layout
+ * for those jobs just specify their total size
+ * to ignore optimized topology. For example,
+ * {512,512,1024,0} OR {2048,0}. */
char *node_list; /* list of reserved nodes or ALL */
char *partition; /* name of partition to be used */
time_t start_time; /* start time of reservation */
@@ -1733,6 +1754,9 @@
#define DEBUG_FLAG_GANG 0x00002000 /* debug gang scheduler */
#define DEBUG_FLAG_RESERVATION 0x00004000 /* advanced reservations */
#define DEBUG_FLAG_FRONT_END 0x00008000 /* front-end nodes */
+#define DEBUG_FLAG_NO_REALTIME 0x00010000 /* get debug when the
+ * realtime server
+ * dies on a bluegene system */
#define GROUP_FORCE 0x8000 /* if set, update group membership
* info even if no updates to
@@ -1750,6 +1774,8 @@
#define PREEMPT_MODE_CANCEL 0x0008 /* always cancel the job */
#define PREEMPT_MODE_GANG 0x8000 /* enable gang scheduling */
+#define RECONFIG_KEEP_PART_INFO 0x0001 /* keep dynamic partition info on scontrol reconfig */
+
typedef struct slurm_ctl_conf {
time_t last_update; /* last update time of the build parameters */
uint16_t accounting_storage_enforce; /* job requires valid association:
@@ -1844,6 +1870,8 @@
uint32_t priority_calc_period; /* seconds between priority decay
* calculation */
uint16_t priority_favor_small; /* favor small jobs over large */
+ uint16_t priority_flags; /* set some flags for priority configuration,
+ * see PRIORITY_FLAGS_* above */
uint32_t priority_max_age; /* time when not to add any more
* priority to a job if reached */
uint16_t priority_reset_period; /* when to clear usage,
@@ -1863,6 +1891,8 @@
* see PROP_PRIO_* */
char *propagate_rlimits;/* Propagate (all/specific) resource limits */
char *propagate_rlimits_except;/* Propagate all rlimits except these */
+ char *reboot_program; /* program to reboot the node */
+ uint16_t reconfig_flags;/* see RECONFIG_* */
char *resume_program; /* program to make nodes full power */
uint16_t resume_rate; /* nodes to make full power, per minute */
uint16_t resume_timeout;/* time required in order to perform a node
@@ -1962,10 +1992,14 @@
uint32_t error_code; /* error code for warning message */
} submit_response_msg_t;
+/* NOTE: If setting node_addr and/or node_hostname then comma separate names
+ * and include an equal number of node_names */
typedef struct slurm_update_node_msg {
char *features; /* new feature for node */
char *gres; /* new generic resources for node */
- char *node_names; /* comma separated list of required nodes */
+ char *node_addr; /* communication name (optional) */
+ char *node_hostname; /* node's hostname (optional) */
+ char *node_names; /* nodelist expression */
uint16_t node_state; /* see enum node_states */
char *reason; /* reason for node being DOWN or DRAINING */
uint32_t reason_uid; /* user ID of sending (needed if user
@@ -1994,12 +2028,57 @@
/* Opaque data type for slurm_step_ctx_* functions */
typedef struct slurm_step_ctx_struct slurm_step_ctx_t;
+#define STAT_COMMAND_RESET 0x0000
+#define STAT_COMMAND_GET 0x0001
+typedef struct stats_info_request_msg {
+ uint16_t command_id;
+} stats_info_request_msg_t;
+
+typedef struct stats_info_response_msg {
+ uint32_t parts_packed;
+ time_t req_time;
+ time_t req_time_start;
+ uint32_t server_thread_count;
+ uint32_t agent_queue_size;
+
+ uint32_t schedule_cycle_max;
+ uint32_t schedule_cycle_last;
+ uint32_t schedule_cycle_sum;
+ uint32_t schedule_cycle_counter;
+ uint32_t schedule_cycle_depth;
+ uint32_t schedule_queue_len;
+
+ uint32_t jobs_submitted;
+ uint32_t jobs_started;
+ uint32_t jobs_completed;
+ uint32_t jobs_canceled;
+ uint32_t jobs_failed;
+
+ uint32_t bf_backfilled_jobs;
+ uint32_t bf_last_backfilled_jobs;
+ uint32_t bf_cycle_counter;
+ uint32_t bf_cycle_sum;
+ uint32_t bf_cycle_last;
+ uint32_t bf_cycle_max;
+ uint32_t bf_last_depth;
+ uint32_t bf_last_depth_try;
+ uint32_t bf_depth_sum;
+ uint32_t bf_depth_try_sum;
+ uint32_t bf_queue_len;
+ uint32_t bf_queue_len_sum;
+ time_t bf_when_last_cycle;
+ uint32_t bf_active;
+} stats_info_response_msg_t;
+
+#define TRIGGER_FLAG_PERM 0x0001
+
#define TRIGGER_RES_TYPE_JOB 0x0001
#define TRIGGER_RES_TYPE_NODE 0x0002
#define TRIGGER_RES_TYPE_SLURMCTLD 0x0003
#define TRIGGER_RES_TYPE_SLURMDBD 0x0004
#define TRIGGER_RES_TYPE_DATABASE 0x0005
#define TRIGGER_RES_TYPE_FRONT_END 0x0006
+
#define TRIGGER_TYPE_UP 0x00000001
#define TRIGGER_TYPE_DOWN 0x00000002
#define TRIGGER_TYPE_FAIL 0x00000004
@@ -2023,6 +2102,7 @@
typedef struct trigger_info {
+ uint16_t flags; /* TRIGGER_FLAG_* */
uint32_t trig_id; /* trigger ID */
uint16_t res_type; /* TRIGGER_RES_TYPE_* */
char * res_id; /* resource ID */
@@ -2291,7 +2371,7 @@
* in the structure that you will pass to slurm_step_ctx_create().
* This function will NOT allocate any new memory.
* IN ptr - pointer to a structure allocated by the user. The structure will
- * be intialized.
+ * be initialized.
*/
extern void slurm_step_ctx_params_t_init PARAMS((slurm_step_ctx_params_t *ptr));
@@ -2360,7 +2440,7 @@
* slurm_step_launch_params_t structure with default values.
* default values. This function will NOT allocate any new memory.
* IN ptr - pointer to a structure allocated by the use. The structure will
- * be intialized.
+ * be initialized.
*/
extern void slurm_step_launch_params_t_init
PARAMS((slurm_step_launch_params_t *ptr));
@@ -2405,6 +2485,14 @@
*/
extern void slurm_step_launch_fwd_wake PARAMS((slurm_step_ctx_t *ctx));
+/*
+ * Specify the plugin name to be used. This may be needed to specify the
+ * non-default MPI plugin when using SLURM API to launch tasks.
+ * IN plugin name - "none", "pmi2", etc.
+ * RET SLURM_SUCCESS or SLURM_ERROR (with errno set)
+ */
+extern int slurm_mpi_plugin_init PARAMS((char *plugin_name));
+
/*****************************************************************************\
* SLURM CONTROL CONFIGURATION READ/PRINT/UPDATE FUNCTIONS
\*****************************************************************************/
@@ -2500,10 +2588,12 @@
extern void slurm_init_update_step_msg PARAMS(
(step_update_request_msg_t * step_msg));
-/* Update the time limit of a job step,
- * IN step_msg - step update messasge descriptor
- * RET 0 or -1 on error */
-extern int slurm_update_step PARAMS((step_update_request_msg_t * step_msg));
+/* Get scheduling statistics */
+extern int slurm_get_statistics PARAMS((stats_info_response_msg_t **buf,
+ stats_info_request_msg_t *req));
+
+/* Reset scheduling statistics */
+extern int slurm_reset_statistics PARAMS((stats_info_request_msg_t *req));
/*****************************************************************************\
* SLURM JOB RESOURCES READ/PRINT FUNCTIONS
@@ -2754,6 +2844,11 @@
extern void slurm_job_step_stat_free(job_step_stat_t *object);
extern void slurm_job_step_stat_response_msg_free(void *object);
+/* Update the time limit of a job step,
+ * IN step_msg - step update messasge descriptor
+ * RET 0 or -1 on error */
+extern int slurm_update_step PARAMS((step_update_request_msg_t * step_msg));
+
/*****************************************************************************\
* SLURM NODE CONFIGURATION READ/PRINT/UPDATE FUNCTIONS
\*****************************************************************************/
diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h
index e26b5a9..6cc5042 100644
--- a/slurm/slurm_errno.h
+++ b/slurm/slurm_errno.h
@@ -189,6 +189,7 @@
ESLURM_PARTITION_IN_USE,
ESLURM_STEP_LIMIT,
ESLURM_JOB_SUSPENDED,
+ ESLURM_CAN_NOT_START_IMMEDIATELY,
/* switch specific error codes, specific values defined in plugin module */
ESLURM_SWITCH_MIN = 3000,
diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h
index 608a830..7643072 100644
--- a/slurm/slurmdb.h
+++ b/slurm/slurmdb.h
@@ -142,6 +142,7 @@
#define QOS_FLAG_PART_TIME_LIMIT 0x00000004
#define QOS_FLAG_ENFORCE_USAGE_THRES 0x00000008
#define QOS_FLAG_NO_RESERVE 0x00000010
+#define QOS_FLAG_REQ_RESV 0x00000020
/* Archive / Purge time flags */
#define SLURMDB_PURGE_BASE 0x0000ffff /* Apply to get the number
@@ -206,6 +207,7 @@
List grp_cpu_run_mins_list; /* list of char * */
List grp_cpus_list; /* list of char * */
List grp_jobs_list; /* list of char * */
+ List grp_mem_list; /* list of char * */
List grp_nodes_list; /* list of char * */
List grp_submit_jobs_list; /* list of char * */
List grp_wall_list; /* list of char * */
@@ -253,6 +255,7 @@
uint16_t duplicates; /* report duplicate job entries */
int32_t exitcode; /* exit code of job */
List groupid_list; /* list of char * */
+ List jobname_list; /* list of char * */
uint32_t nodes_max; /* number of nodes high range */
uint32_t nodes_min; /* number of nodes low range */
List partition_list; /* list of char * */
@@ -378,6 +381,9 @@
uint32_t grp_jobs; /* max number of jobs the
* underlying group of associations can run
* at one time */
+ uint32_t grp_mem; /* max amount of memory the
+ * underlying group of
+ * associations can allocate at once */
uint32_t grp_nodes; /* max number of nodes the
* underlying group of
* associations can allocate at once */
@@ -582,6 +588,8 @@
can allocate at one time */
uint32_t grp_jobs; /* max number of jobs this qos can run
* at one time */
+ uint32_t grp_mem; /* max amount of memory this qos
+ can allocate at one time */
uint32_t grp_nodes; /* max number of nodes this qos
can allocate at once */
uint32_t grp_submit_jobs; /* max number of jobs this qos can submit at
@@ -592,7 +600,7 @@
* use with this qos */
uint64_t max_cpu_run_mins_pu; /* max number of cpu mins a user can
* allocate at a given time when
- * using this qos */
+ * using this qos (Not yet valid option) */
uint32_t max_cpus_pj; /* max number of cpus a job can
* allocate with this qos */
uint32_t max_cpus_pu; /* max number of cpus a user can
diff --git a/slurm/spank.h b/slurm/spank.h
index 8881ffc..5ddf73c 100644
--- a/slurm/spank.h
+++ b/slurm/spank.h
@@ -56,36 +56,45 @@
*/
typedef int (spank_f) (spank_t spank, int ac, char *argv[]);
-/* SPANK plugin operations. SPANK plugin should have at least one of
+/* SPANK plugin operations. SPANK plugin should have at least one of
* these functions defined non-NULL.
*
* Plug-in callbacks are completed at the following points in slurmd:
*
- * slurmd -> slurmstepd
- * `-> init ()
- * -> process spank options
- * -> init_post_opt ()
- * + drop privileges (initgroups(), seteuid(), chdir())
- * `-> user_init ()
- * + for each task
- * | + fork ()
- * | |
- * | + reclaim privileges
- * | `-> task_init_privileged ()
- * | |
- * | + become_user ()
- * | `-> task_init ()
- * | |
- * | + execve ()
- * |
- * + reclaim privileges
- * + for each task
- * | `-> task_post_fork ()
- * |
- * + for each task
- * | + wait ()
- * | `-> task_exit ()
- * `-> exit ()
+ * slurmd
+ * `-> slurmd_init()
+ * |
+ * `-> job_prolog()
+ * |
+ * | `-> slurmstepd
+ * | `-> init ()
+ * | -> process spank options
+ * | -> init_post_opt ()
+ * | + drop privileges (initgroups(), seteuid(), chdir())
+ * | `-> user_init ()
+ * | + for each task
+ * | | + fork ()
+ * | | |
+ * | | + reclaim privileges
+ * | | `-> task_init_privileged ()
+ * | | |
+ * | | + become_user ()
+ * | | `-> task_init ()
+ * | | |
+ * | | + execve ()
+ * | |
+ * | + reclaim privileges
+ * | + for each task
+ * | | `-> task_post_fork ()
+ * | |
+ * | + for each task
+ * | | + wait ()
+ * | | `-> task_exit ()
+ * | `-> exit ()
+ * |
+ * `---> job_epilog()
+ * |
+ * `-> slurmd_exit()
*
* In srun only the init(), init_post_opt() and local_user_init(), and exit()
* callbacks are used.
@@ -93,9 +102,14 @@
* In sbatch/salloc only the init(), init_post_opt(), and exit() callbacks
* are used.
*
+ * In slurmd proper, only the slurmd_init(), slurmd_exit(), and
+ * job_prolog/epilog callbacks are used.
+ *
*/
extern spank_f slurm_spank_init;
+extern spank_f slurm_spank_slurmd_init;
+extern spank_f slurm_spank_job_prolog;
extern spank_f slurm_spank_init_post_opt;
extern spank_f slurm_spank_local_user_init;
extern spank_f slurm_spank_user_init;
@@ -103,6 +117,8 @@
extern spank_f slurm_spank_task_init;
extern spank_f slurm_spank_task_post_fork;
extern spank_f slurm_spank_task_exit;
+extern spank_f slurm_spank_job_epilog;
+extern spank_f slurm_spank_slurmd_exit;
extern spank_f slurm_spank_exit;
@@ -183,10 +199,15 @@
enum spank_context {
S_CTX_ERROR, /* Error obtaining current context */
S_CTX_LOCAL, /* Local context (srun) */
- S_CTX_REMOTE, /* Remote context (slurmd) */
- S_CTX_ALLOCATOR /* Allocator context (sbatch/salloc) */
+ S_CTX_REMOTE, /* Remote context (slurmstepd) */
+ S_CTX_ALLOCATOR, /* Allocator context (sbatch/salloc) */
+ S_CTX_SLURMD, /* slurmd context */
+ S_CTX_JOB_SCRIPT /* prolog/epilog context */
};
+#define HAVE_S_CTX_SLURMD 1 /* slurmd context supported */
+#define HAVE_S_CTX_JOB_SCRIPT 1 /* job script (prolog/epilog) supported */
+
typedef enum spank_context spank_context_t;
/*
@@ -198,7 +219,7 @@
* the plugin to distinguish between plugin-local options, `optarg'
* is an argument passed by the user (if applicable), and `remote'
* specifies whether this call is being made locally (e.g. in srun)
- * or remotely (e.g. in slurmd).
+ * or remotely (e.g. in slurmstepd/slurmd).
*/
typedef int (*spank_opt_cb_f) (int val, const char *optarg, int remote);
@@ -254,7 +275,7 @@
* Determine whether plugin is loaded in "remote" context
*
* Returns:
- * = 1 remote context, i.e. plugin is loaded in slurmd.
+ * = 1 remote context, i.e. plugin is loaded in /slurmstepd.
* = 0 not remote context
* < 0 spank handle was not valid.
*/
@@ -284,6 +305,22 @@
*/
spank_err_t spank_option_register (spank_t spank, struct spank_option *opt);
+/*
+ * Check whether spank plugin option [opt] has been activated.
+ * If the option takes an argument, then the option argument
+ * (if found) will be returned in *optarg.
+ *
+ * Returns
+ * ESPANK_SUCCESS if the option was used by user. In this case
+ * *optarg will contain the option argument if opt->has_arg != 0.
+ * ESPANK_ERROR if the option wasn't used.
+ * ESPANK_BAD_ARG if an invalid argument was passed to the function,
+ * such as NULL opt, NULL opt->name, or NULL optarg when opt->has_arg != 0.
+ * ESPANK_NOT_AVAIL if called from improper context.
+ */
+spank_err_t spank_option_getopt (spank_t spank, struct spank_option *opt,
+ char **optarg);
+
/* Get the value for the current job or task item specified,
* storing the result in the subsequent pointer argument(s).
@@ -296,7 +333,7 @@
* item is requested from outside a task context, ESPANK_BAD_ARG
* if invalid args are passed to spank_get_item or spank_get_item
* is called from an invalid context, and ESPANK_NOT_REMOTE
- * if not called from slurmd context or spank_user_local_init.
+ * if not called from slurmstepd context or spank_user_local_init.
*/
spank_err_t spank_get_item (spank_t spank, spank_item_t item, ...);
@@ -319,7 +356,7 @@
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_ENV_EXISTS = var exists in job env and overwrite == 0.
* ESPANK_BAD_ARG = spank handle invalid or var/val are NULL.
- * ESPANK_NOT_REMOTE = not called from slurmd.
+ * ESPANK_NOT_REMOTE = not called from slurmstepd.
*/
spank_err_t spank_setenv (spank_t spank, const char *var, const char *val,
int overwrite);
@@ -330,7 +367,7 @@
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_BAD_ARG = spank handle invalid or var is NULL.
- * ESPANK_NOT_REMOTE = not called from slurmd.
+ * ESPANK_NOT_REMOTE = not called from slurmstepd.
*/
spank_err_t spank_unsetenv (spank_t spank, const char *var);
diff --git a/src/Makefile.am b/src/Makefile.am
index f9c8dcc..76a5713 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@
slurmctld slurmd slurmdbd plugins sbcast \
scontrol scancel squeue sinfo smap sview salloc \
sbatch sattach strigger sacct sacctmgr sreport sstat \
- sshare sprio
+ sshare sprio sdiag
if !BUILD_SRUN2APRUN
if !REAL_BG_L_P_LOADED
diff --git a/src/Makefile.in b/src/Makefile.in
index f4040b7..ce68b80 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -102,7 +102,7 @@
DIST_SUBDIRS = common api db_api database slurmctld slurmd slurmdbd \
plugins sbcast scontrol scancel squeue sinfo smap sview salloc \
sbatch sattach strigger sacct sacctmgr sreport sstat sshare \
- sprio srun srun_cr
+ sprio sdiag srun srun_cr
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
@@ -206,6 +206,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -242,6 +243,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -332,7 +334,7 @@
SUBDIRS = common api db_api database slurmctld slurmd slurmdbd plugins \
sbcast scontrol scancel squeue sinfo smap sview salloc sbatch \
sattach strigger sacct sacctmgr sreport sstat sshare sprio \
- $(am__append_1) $(am__append_2)
+ sdiag $(am__append_1) $(am__append_2)
all: all-recursive
.SUFFIXES:
@@ -581,10 +583,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/api/Makefile.am b/src/api/Makefile.am
index b03bf4e..20f606c 100644
--- a/src/api/Makefile.am
+++ b/src/api/Makefile.am
@@ -88,12 +88,13 @@
config_info.c \
front_end_info.c \
init_msg.c \
- job_info.c job_info.h \
+ job_info.c \
job_step_info.c \
node_info.c \
partition_info.c \
reservation_info.c \
signal.c \
+ slurm_get_statistics.c \
slurm_hostlist.c \
slurm_pmi.c slurm_pmi.h \
step_ctx.c step_ctx.h \
@@ -107,6 +108,7 @@
reconfigure.c \
update_config.c
+
common_dir = $(top_builddir)/src/common
slurmapi_add = \
@@ -129,7 +131,7 @@
libslurm_la_LDFLAGS = \
$(LIB_LDFLAGS) \
-version-info $(current):$(rev):$(age) \
- $(OTHER_FLAGS)
+ $(OTHER_FLAGS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS)
#
# The libpmi_la_LIBADD specification below causes libpmi.la to relink
diff --git a/src/api/Makefile.in b/src/api/Makefile.in
index e844208..0f4a1a8 100644
--- a/src/api/Makefile.in
+++ b/src/api/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -108,6 +108,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
libpmi_la_DEPENDENCIES = $(top_builddir)/src/api/libslurm.la
@@ -129,9 +135,10 @@
checkpoint.lo complete.lo config_info.lo front_end_info.lo \
init_msg.lo job_info.lo job_step_info.lo node_info.lo \
partition_info.lo reservation_info.lo signal.lo \
- slurm_hostlist.lo slurm_pmi.lo step_ctx.lo step_io.lo \
- step_launch.lo pmi_server.lo submit.lo suspend.lo topo_info.lo \
- triggers.lo reconfigure.lo update_config.lo
+ slurm_get_statistics.lo slurm_hostlist.lo slurm_pmi.lo \
+ step_ctx.lo step_io.lo step_launch.lo pmi_server.lo submit.lo \
+ suspend.lo topo_info.lo triggers.lo reconfigure.lo \
+ update_config.lo
am_libslurmhelper_la_OBJECTS = $(am__objects_1)
libslurmhelper_la_OBJECTS = $(am_libslurmhelper_la_OBJECTS)
PROGRAMS = $(noinst_PROGRAMS)
@@ -238,6 +245,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -274,6 +282,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -443,12 +452,13 @@
config_info.c \
front_end_info.c \
init_msg.c \
- job_info.c job_info.h \
+ job_info.c \
job_step_info.c \
node_info.c \
partition_info.c \
reservation_info.c \
signal.c \
+ slurm_get_statistics.c \
slurm_hostlist.c \
slurm_pmi.c slurm_pmi.h \
step_ctx.c step_ctx.h \
@@ -483,7 +493,7 @@
libslurm_la_LDFLAGS = \
$(LIB_LDFLAGS) \
-version-info $(current):$(rev):$(age) \
- $(OTHER_FLAGS)
+ $(OTHER_FLAGS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS)
#
@@ -586,11 +596,11 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libpmi.la: $(libpmi_la_OBJECTS) $(libpmi_la_DEPENDENCIES)
+libpmi.la: $(libpmi_la_OBJECTS) $(libpmi_la_DEPENDENCIES) $(EXTRA_libpmi_la_DEPENDENCIES)
$(libpmi_la_LINK) -rpath $(libdir) $(libpmi_la_OBJECTS) $(libpmi_la_LIBADD) $(LIBS)
-libslurm.la: $(libslurm_la_OBJECTS) $(libslurm_la_DEPENDENCIES)
+libslurm.la: $(libslurm_la_OBJECTS) $(libslurm_la_DEPENDENCIES) $(EXTRA_libslurm_la_DEPENDENCIES)
$(libslurm_la_LINK) -rpath $(libdir) $(libslurm_la_OBJECTS) $(libslurm_la_LIBADD) $(LIBS)
-libslurmhelper.la: $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_DEPENDENCIES)
+libslurmhelper.la: $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_DEPENDENCIES) $(EXTRA_libslurmhelper_la_DEPENDENCIES)
$(LINK) $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@@ -626,6 +636,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reconfigure.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reservation_info.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_get_statistics.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_hostlist.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_pmi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/step_ctx.Plo@am__quote@
@@ -765,10 +776,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/api/allocate.c b/src/api/allocate.c
index 022c25b..4d3164d 100644
--- a/src/api/allocate.c
+++ b/src/api/allocate.c
@@ -211,6 +211,7 @@
if (!req->immediate) {
listen = _create_allocation_response_socket(hostname);
if (listen == NULL) {
+ xfree(req->alloc_node);
xfree(req);
return NULL;
}
@@ -228,6 +229,7 @@
destroy_forward(&resp_msg.forward);
if (!req->immediate)
_destroy_allocation_response_socket(listen);
+ xfree(req->alloc_node);
xfree(req);
errno = errnum;
return NULL;
@@ -277,6 +279,7 @@
destroy_forward(&resp_msg.forward);
if (!req->immediate)
_destroy_allocation_response_socket(listen);
+ xfree(req->alloc_node);
xfree(req);
errno = errnum;
return resp;
diff --git a/src/api/allocate_msg.c b/src/api/allocate_msg.c
index a4342b3..f01b3bb 100644
--- a/src/api/allocate_msg.c
+++ b/src/api/allocate_msg.c
@@ -122,7 +122,7 @@
}
if (net_stream_listen(&sock, (short *)port) < 0) {
- error("unable to intialize step launch listening socket: %m");
+ error("unable to initialize step launch listening socket: %m");
xfree(msg_thr);
return NULL;
}
diff --git a/src/api/block_info.c b/src/api/block_info.c
index 06aa3f8..ccbc627 100644
--- a/src/api/block_info.c
+++ b/src/api/block_info.c
@@ -115,7 +115,7 @@
block_info_t * block_ptr, int one_liner)
{
int j;
- char tmp1[16], *tmp_char = NULL;
+ char tmp1[16], tmp2[16], *tmp_char = NULL;
char *out = NULL;
char *line_end = "\n ";
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
@@ -126,20 +126,34 @@
/****** Line 1 ******/
convert_num_unit((float)block_ptr->cnode_cnt, tmp1, sizeof(tmp1),
UNIT_NONE);
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ convert_num_unit((float)block_ptr->cnode_err_cnt, tmp2,
+ sizeof(tmp2), UNIT_NONE);
+ tmp_char = xstrdup_printf("%s/%s", tmp1, tmp2);
+ } else
+ tmp_char = tmp1;
out = xstrdup_printf("BlockName=%s TotalNodes=%s State=%s%s",
- block_ptr->bg_block_id, tmp1,
+ block_ptr->bg_block_id, tmp_char,
bg_block_state_string(block_ptr->state),
line_end);
-
+ if (cluster_flags & CLUSTER_FLAG_BGQ)
+ xfree(tmp_char);
/****** Line 2 ******/
- if (block_ptr->job_running > NO_JOB_RUNNING)
- xstrfmtcat(out, "JobRunning=%u ", block_ptr->job_running);
- else
+ j = 0;
+ if (block_ptr->job_list)
+ j = list_count(block_ptr->job_list);
+
+ if (!j)
xstrcat(out, "JobRunning=NONE ");
+ else if (j == 1) {
+ block_job_info_t *block_job = list_peek(block_ptr->job_list);
+ xstrfmtcat(out, "JobRunning=%u ", block_job->job_id);
+ } else
+ xstrcat(out, "JobRunning=Multiple ");
+
tmp_char = conn_type_string_full(block_ptr->conn_type);
- xstrfmtcat(out, "User=%s ConnType=%s",
- block_ptr->owner_name, tmp_char);
+ xstrfmtcat(out, "ConnType=%s", tmp_char);
xfree(tmp_char);
if(cluster_flags & CLUSTER_FLAG_BGL)
xstrfmtcat(out, " NodeUse=%s",
diff --git a/src/api/cancel.c b/src/api/cancel.c
index 1f6f366..5fc6309 100644
--- a/src/api/cancel.c
+++ b/src/api/cancel.c
@@ -1,6 +1,5 @@
/*****************************************************************************\
* cancel.c - cancel a slurm job or job step
- * $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -57,7 +56,7 @@
* IN batch_flag - 1 to signal batch shell only, otherwise 0
* RET 0 on success, otherwise return -1 and set errno to indicate the error
*/
-int
+extern int
slurm_kill_job (uint32_t job_id, uint16_t signal, uint16_t batch_flag)
{
int rc;
@@ -92,7 +91,7 @@
* IN signal - signal number
* RET 0 on success, otherwise return -1 and set errno to indicate the error
*/
-int
+extern int
slurm_kill_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal)
{
int rc;
diff --git a/src/api/config_info.c b/src/api/config_info.c
index c0b9359..6ab4bc8 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -641,6 +641,13 @@
key_pair->value = xstrdup(tmp_str);
list_append(ret_list, key_pair);
+ snprintf(tmp_str, sizeof(tmp_str), "%u",
+ slurm_ctl_conf_ptr->priority_flags);
+ key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("PriorityFlags");
+ key_pair->value = xstrdup(tmp_str);
+ list_append(ret_list, key_pair);
+
secs2time_str((time_t) slurm_ctl_conf_ptr->priority_max_age,
tmp_str, sizeof(tmp_str));
key_pair = xmalloc(sizeof(config_key_pair_t));
@@ -738,6 +745,16 @@
list_append(ret_list, key_pair);
key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("RebootProgram");
+ key_pair->value = xstrdup(slurm_ctl_conf_ptr->reboot_program);
+ list_append(ret_list, key_pair);
+
+ key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("ReconfigFlags");
+ key_pair->value = reconfig_flags2str(slurm_ctl_conf_ptr->reconfig_flags);
+ list_append(ret_list, key_pair);
+
+ key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("ResumeProgram");
key_pair->value = xstrdup(slurm_ctl_conf_ptr->resume_program);
list_append(ret_list, key_pair);
@@ -831,8 +848,8 @@
key_pair->value = xstrdup(tmp_str);
list_append(ret_list, key_pair);
- snprintf(tmp_str, sizeof(tmp_str), "%u",
- slurm_ctl_conf_ptr->slurmctld_debug);
+ snprintf(tmp_str, sizeof(tmp_str), "%s",
+ log_num2string(slurm_ctl_conf_ptr->slurmctld_debug));
key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("SlurmctldDebug");
key_pair->value = xstrdup(tmp_str);
@@ -869,8 +886,8 @@
key_pair->value = xstrdup(tmp_str);
list_append(ret_list, key_pair);
- snprintf(tmp_str, sizeof(tmp_str), "%u",
- slurm_ctl_conf_ptr->slurmd_debug);
+ snprintf(tmp_str, sizeof(tmp_str), "%s",
+ log_num2string(slurm_ctl_conf_ptr->slurmd_debug));
key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("SlurmdDebug");
key_pair->value = xstrdup(tmp_str);
diff --git a/src/api/init_msg.c b/src/api/init_msg.c
index 8071179..703355c 100644
--- a/src/api/init_msg.c
+++ b/src/api/init_msg.c
@@ -145,7 +145,6 @@
resv_msg->duration = NO_VAL;
resv_msg->end_time = (time_t) NO_VAL;
resv_msg->flags = (uint16_t) NO_VAL;
- resv_msg->node_cnt = NO_VAL;
resv_msg->start_time = (time_t) NO_VAL;
}
@@ -179,7 +178,6 @@
{
memset(update_block_msg, 0, sizeof(update_block_msg_t));
update_block_msg->conn_type[0] = (uint16_t)NO_VAL;
- update_block_msg->job_running = NO_VAL;
update_block_msg->cnode_cnt = NO_VAL;
update_block_msg->node_use = (uint16_t)NO_VAL;
update_block_msg->state = (uint16_t)NO_VAL;
diff --git a/src/api/job_info.c b/src/api/job_info.c
index b872d37..6bf5761 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -161,7 +161,7 @@
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
if (cluster_flags & CLUSTER_FLAG_BG) {
- nodelist = "BP_List";
+ nodelist = "MidplaneList";
select_g_select_jobinfo_get(job_ptr->select_jobinfo,
SELECT_JOBDATA_IONODES,
&ionodes);
diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c
index d644d35..b7483a6 100644
--- a/src/api/job_step_info.c
+++ b/src/api/job_step_info.c
@@ -165,7 +165,7 @@
SELECT_JOBDATA_IONODES,
&io_nodes);
snprintf(tmp_line, sizeof(tmp_line),
- "Partition=%s BP_List=%s[%s] Gres=%s",
+ "Partition=%s MidplaneList=%s[%s] Gres=%s",
job_step_ptr->partition,
job_step_ptr->nodes, io_nodes,
job_step_ptr->gres);
@@ -316,9 +316,9 @@
xassert(resp);
- if(!node_list) {
- if(!(step_layout =
- slurm_job_step_layout_get(job_id, step_id))) {
+ if (!node_list) {
+ if (!(step_layout =
+ slurm_job_step_layout_get(job_id, step_id))) {
rc = errno;
error("slurm_job_step_stat: "
"problem getting step_layout for %u.%u: %s",
@@ -328,7 +328,7 @@
node_list = step_layout->node_list;
}
- if(!*resp) {
+ if (!*resp) {
resp_out = xmalloc(sizeof(job_step_stat_response_msg_t));
*resp = resp_out;
created = 1;
@@ -342,16 +342,16 @@
slurm_msg_t_init(&req_msg);
memset(&req, 0, sizeof(job_step_id_msg_t));
- resp_out->job_id = req.job_id = job_id;
+ resp_out->job_id = req.job_id = job_id;
resp_out->step_id = req.step_id = step_id;
req_msg.msg_type = REQUEST_JOB_STEP_STAT;
req_msg.data = &req;
- if(!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) {
+ if (!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) {
error("slurm_job_step_stat: got an error no list returned");
rc = SLURM_ERROR;
- if(created) {
+ if (created) {
slurm_job_step_stat_response_msg_free(resp_out);
*resp = NULL;
}
@@ -359,10 +359,10 @@
}
itr = list_iterator_create(ret_list);
- while((ret_data_info = list_next(itr))) {
+ while ((ret_data_info = list_next(itr))) {
switch (ret_data_info->type) {
case RESPONSE_JOB_STEP_STAT:
- if(!resp_out->stats_list)
+ if (!resp_out->stats_list)
resp_out->stats_list = list_create(
slurm_free_job_step_stat);
list_push(resp_out->stats_list,
@@ -390,7 +390,7 @@
list_iterator_destroy(itr);
list_destroy(ret_list);
- if(resp_out->stats_list)
+ if (resp_out->stats_list)
list_sort(resp_out->stats_list, (ListCmpF)_sort_stats_by_name);
cleanup:
slurm_step_layout_destroy(step_layout);
diff --git a/src/api/node_info.c b/src/api/node_info.c
index 4f8f0c8..bf25e8e 100644
--- a/src/api/node_info.c
+++ b/src/api/node_info.c
@@ -123,9 +123,9 @@
int node_scaling, int one_liner )
{
uint16_t my_state = node_ptr->node_state;
- char *comp_str = "", *drain_str = "", *power_str = "";
+ char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = "";
char tmp_line[512], time_str[32];
- char *out = NULL;
+ char *out = NULL, *reason_str = NULL, *select_reason_str = NULL;
uint16_t err_cpus = 0, alloc_cpus = 0;
int cpus_per_node = 1;
int total_used = node_ptr->cpus;
@@ -134,6 +134,10 @@
if (node_scaling)
cpus_per_node = node_ptr->cpus / node_scaling;
+ if (my_state & NODE_STATE_CLOUD) {
+ my_state &= (~NODE_STATE_CLOUD);
+ cloud_str = "+CLOUD";
+ }
if (my_state & NODE_STATE_COMPLETING) {
my_state &= (~NODE_STATE_COMPLETING);
comp_str = "+COMPLETING";
@@ -177,6 +181,16 @@
/****** Line 1 ******/
snprintf(tmp_line, sizeof(tmp_line), "NodeName=%s ", node_ptr->name);
xstrcat(out, tmp_line);
+ if (cluster_flags & CLUSTER_FLAG_BG) {
+ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
+ SELECT_NODEDATA_RACK_MP,
+ 0, &select_reason_str);
+ if (select_reason_str) {
+ xstrfmtcat(out, "RackMidplane=%s ", select_reason_str);
+ xfree(select_reason_str);
+ }
+ }
+
if (node_ptr->arch) {
snprintf(tmp_line, sizeof(tmp_line), "Arch=%s ",
node_ptr->arch);
@@ -236,8 +250,9 @@
/****** Line 6 ******/
snprintf(tmp_line, sizeof(tmp_line),
- "State=%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u",
- node_state_string(my_state), comp_str, drain_str, power_str,
+ "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u",
+ node_state_string(my_state),
+ cloud_str, comp_str, drain_str, power_str,
node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);
xstrcat(out, tmp_line);
if (one_liner)
@@ -269,18 +284,44 @@
xstrcat(out, "\n ");
/****** Line 8 ******/
- if (node_ptr->reason_time) {
- char *user_name = uid_to_string(node_ptr->reason_uid);
- slurm_make_time_str ((time_t *)&node_ptr->reason_time,
- time_str, sizeof(time_str));
- snprintf(tmp_line, sizeof(tmp_line), "Reason=%s [%s@%s]",
- node_ptr->reason, user_name, time_str);
- xstrcat(out, tmp_line);
- xfree(user_name);
- } else {
- snprintf(tmp_line, sizeof(tmp_line), "Reason=%s",
- node_ptr->reason);
- xstrcat(out, tmp_line);
+ if (node_ptr->reason && node_ptr->reason[0])
+ xstrcat(reason_str, node_ptr->reason);
+ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
+ SELECT_NODEDATA_EXTRA_INFO,
+ 0, &select_reason_str);
+ if (select_reason_str && select_reason_str[0]) {
+ if (reason_str)
+ xstrcat(reason_str, "\n");
+ xstrcat(reason_str, select_reason_str);
+ }
+ xfree(select_reason_str);
+ if (reason_str) {
+ int inx = 1;
+ char *save_ptr = NULL, *tok, *user_name;
+ tok = strtok_r(reason_str, "\n", &save_ptr);
+ while (tok) {
+ if (inx == 1) {
+ xstrcat(out, "Reason=");
+ } else {
+ if (one_liner)
+ xstrcat(out, " ");
+ else
+ xstrcat(out, "\n ");
+ xstrcat(out, " ");
+ }
+ snprintf(tmp_line, sizeof(tmp_line), "%s", tok);
+ xstrcat(out, tmp_line);
+ if ((inx++ == 1) && node_ptr->reason_time) {
+ user_name = uid_to_string(node_ptr->reason_uid);
+ slurm_make_time_str((time_t *)&node_ptr->reason_time,
+ time_str,sizeof(time_str));
+ snprintf(tmp_line, sizeof(tmp_line),
+ " [%s@%s]", user_name, time_str);
+ xstrcat(out, tmp_line);
+ }
+ tok = strtok_r(NULL, "\n", &save_ptr);
+ }
+ xfree(reason_str);
}
if (one_liner)
xstrcat(out, "\n");
diff --git a/src/api/partition_info.c b/src/api/partition_info.c
index fa19e12..c1a920c 100644
--- a/src/api/partition_info.c
+++ b/src/api/partition_info.c
@@ -256,6 +256,11 @@
else
sprintf(tmp_line, " RootOnly=NO");
xstrcat(out, tmp_line);
+ if (part_ptr->flags & PART_FLAG_REQ_RESV)
+ sprintf(tmp_line, " ReqResv=YES");
+ else
+ sprintf(tmp_line, " ReqResv=NO");
+ xstrcat(out, tmp_line);
force = part_ptr->max_share & SHARED_FORCE;
val = part_ptr->max_share & (~SHARED_FORCE);
diff --git a/src/api/signal.c b/src/api/signal.c
index 4ea0934..4d214fe 100644
--- a/src/api/signal.c
+++ b/src/api/signal.c
@@ -277,8 +277,8 @@
goto fail;
}
for (i = 0; i < step_info->job_step_count; i++) {
- if (step_info->job_steps[i].job_id == job_id
- && step_info->job_steps[i].step_id == step_id) {
+ if ((step_info->job_steps[i].job_id == job_id) &&
+ (step_info->job_steps[i].step_id == step_id)) {
rc = _signal_job_step(&step_info->job_steps[i],
alloc_info, signal);
save_errno = rc;
@@ -373,8 +373,8 @@
goto fail;
}
for (i = 0; i < step_info->job_step_count; i++) {
- if (step_info->job_steps[i].job_id == job_id
- && step_info->job_steps[i].step_id == step_id) {
+ if ((step_info->job_steps[i].job_id == job_id) &&
+ (step_info->job_steps[i].step_id == step_id)) {
rc = _terminate_job_step(&step_info->job_steps[i],
alloc_info);
save_errno = errno;
diff --git a/src/api/slurm_get_statistics.c b/src/api/slurm_get_statistics.c
new file mode 100644
index 0000000..1139155
--- /dev/null
+++ b/src/api/slurm_get_statistics.c
@@ -0,0 +1,115 @@
+/****************************************************************************\
+ * slurm_get_statistics.c - functions for sdiag command
+ *****************************************************************************
+ * Produced at Barcelona Supercomputing Center, December 2011
+ * Written by Alejandro Lucero <alucero@bsc.es>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+
+#include <slurm/slurm.h>
+#include <slurm/slurm_errno.h>
+
+#include "src/common/read_config.h"
+#include "src/common/slurm_protocol_api.h"
+
+
+extern int slurm_reset_statistics(stats_info_request_msg_t *req)
+{
+ int rc;
+ slurm_msg_t req_msg;
+ slurm_msg_t resp_msg;
+
+ slurm_msg_t_init(&req_msg);
+ slurm_msg_t_init(&resp_msg);
+
+ req_msg.msg_type = REQUEST_STATS_INFO;
+ req_msg.data = req;
+
+ rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);
+
+ if (rc == SLURM_SOCKET_ERROR)
+ return SLURM_ERROR;
+
+ switch (resp_msg.msg_type) {
+ case RESPONSE_STATS_INFO:
+ break;
+ case RESPONSE_SLURM_RC:
+ rc = ((return_code_msg_t *) resp_msg.data)->return_code;
+ if (rc)
+ slurm_seterrno_ret(rc);
+ break;
+ default:
+ slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
+ }
+
+ return SLURM_PROTOCOL_SUCCESS;
+
+}
+
+extern int slurm_get_statistics(stats_info_response_msg_t **buf,
+ stats_info_request_msg_t *req)
+{
+ int rc;
+ slurm_msg_t req_msg;
+ slurm_msg_t resp_msg;
+
+ slurm_msg_t_init(&req_msg);
+ slurm_msg_t_init(&resp_msg);
+
+ req_msg.msg_type = REQUEST_STATS_INFO;
+ req_msg.data = req;
+
+ rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);
+
+ if (rc == SLURM_SOCKET_ERROR)
+ return SLURM_ERROR;
+
+ switch (resp_msg.msg_type) {
+ case RESPONSE_STATS_INFO:
+ *buf = (stats_info_response_msg_t *)resp_msg.data;
+ break;
+ case RESPONSE_SLURM_RC:
+ rc = ((return_code_msg_t *) resp_msg.data)->return_code;
+ if (rc)
+ slurm_seterrno_ret(rc);
+ buf = NULL;
+ break;
+ default:
+ slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
+ }
+
+ return SLURM_PROTOCOL_SUCCESS;
+}
diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c
index 0775543..6b73b70 100644
--- a/src/api/step_ctx.c
+++ b/src/api/step_ctx.c
@@ -156,7 +156,7 @@
*/
if (net_stream_listen(&sock, &port) < 0) {
errnum = errno;
- error("unable to intialize step context socket: %m");
+ error("unable to initialize step context socket: %m");
slurm_free_job_step_create_request_msg(step_req);
goto fail;
}
@@ -216,7 +216,7 @@
*/
if (net_stream_listen(&sock, &port) < 0) {
errnum = errno;
- error("unable to intialize step context socket: %m");
+ error("unable to initialize step context socket: %m");
slurm_free_job_step_create_request_msg(step_req);
goto fail;
}
@@ -464,7 +464,7 @@
* in the structure that you will pass to slurm_step_ctx_create().
* This function will NOT allocate any new memory.
* IN ptr - pointer to a structure allocated by the user. The structure will
- * be intialized.
+ * be initialized.
*/
extern void slurm_step_ctx_params_t_init (slurm_step_ctx_params_t *ptr)
{
diff --git a/src/api/step_io.c b/src/api/step_io.c
index eae4d13..1f844f1 100644
--- a/src/api/step_io.c
+++ b/src/api/step_io.c
@@ -1170,6 +1170,9 @@
int
client_io_handler_finish(client_io_t *cio)
{
+ if (cio == NULL)
+ return SLURM_SUCCESS;
+
eio_signal_shutdown(cio->eio);
if (pthread_join(cio->ioid, NULL) < 0) {
error("Waiting for client io pthread: %m");
@@ -1182,7 +1185,8 @@
void
client_io_handler_destroy(client_io_t *cio)
{
- xassert(cio);
+ if (cio == NULL)
+ return;
/* FIXME - perhaps should make certain that IO engine is shutdown
(by calling client_io_handler_finish()) before freeing anything */
@@ -1208,6 +1212,7 @@
if (cio == NULL)
return;
+
pthread_mutex_lock(&cio->ioservers_lock);
for (i = 0; i < num_node_ids; i++) {
node_id = node_ids[i];
@@ -1239,6 +1244,8 @@
struct server_io_info *info;
int i;
+ if (cio == NULL)
+ return;
pthread_mutex_lock(&cio->ioservers_lock);
for (i = 0; i < cio->num_nodes; i++) {
if (!bit_test(cio->ioservers_ready_bits, i)) {
diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index f8c8d29..5a1d929 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -125,7 +125,7 @@
* slurm_step_launch_params_t structure with default values.
* This function will NOT allocate any new memory.
* IN ptr - pointer to a structure allocated by the user.
- * The structure will be intialized.
+ * The structure will be initialized.
*/
void slurm_step_launch_params_t_init (slurm_step_launch_params_t *ptr)
{
@@ -141,6 +141,17 @@
}
/*
+ * Specify the plugin name to be used. This may be needed to specify the
+ * non-default MPI plugin when using SLURM API to launch tasks.
+ * IN plugin name - "none", "pmi2", etc.
+ * RET SLURM_SUCCESS or SLURM_ERROR (with errno set)
+ */
+extern int slurm_mpi_plugin_init(char *plugin_name)
+{
+ return mpi_hook_client_init(plugin_name);
+}
+
+/*
* slurm_step_launch - launch a parallel job step
* IN ctx - job step context generated by slurm_step_ctx_create
* IN callbacks - Identify functions to be called when various events occur
@@ -230,6 +241,7 @@
} else {
launch.cwd = _lookup_cwd();
}
+ launch.alias_list = params->alias_list;
launch.nnodes = ctx->step_resp->step_layout->node_cnt;
launch.ntasks = ctx->step_resp->step_layout->task_cnt;
launch.slurmd_debug = params->slurmd_debug;
@@ -489,11 +501,13 @@
eio_signal_shutdown(sls->msg_handle);
pthread_mutex_unlock(&sls->lock);
- pthread_join(sls->msg_thread, NULL);
+ if (sls->msg_thread)
+ pthread_join(sls->msg_thread, NULL);
pthread_mutex_lock(&sls->lock);
pmi_kvs_free();
- eio_handle_destroy(sls->msg_handle);
+ if (sls->msg_handle)
+ eio_handle_destroy(sls->msg_handle);
/* Shutdown the io timeout thread, if one exists */
if (sls->io_timeout_thread_created) {
@@ -820,7 +834,7 @@
for (i = 0; i < sls->num_resp_port; i++) {
if (net_stream_listen(&sock, &port) < 0) {
- error("unable to intialize step launch listening "
+ error("unable to initialize step launch listening "
"socket: %m");
return SLURM_ERROR;
}
@@ -1161,6 +1175,16 @@
hostset_destroy(all_nodes);
}
+/* This RPC will probably only be used on BlueGene/Q systems to signal the
+ * runjob process */
+static void
+_step_step_signal(struct step_launch_state *sls, slurm_msg_t *signal_msg)
+{
+ job_step_kill_msg_t *step_signal = signal_msg->data;
+ debug2("Signal %u requested for step %u.%u", step_signal->signal,
+ step_signal->job_id, step_signal->job_step_id);
+}
+
/*
* The TCP connection that was used to send the task_spawn_io_msg_t message
* will be used as the user managed IO stream. The remote end of the TCP stream
@@ -1269,6 +1293,11 @@
_step_missing_handler(sls, msg);
slurm_free_srun_step_missing_msg(msg->data);
break;
+ case SRUN_STEP_SIGNAL:
+ debug2("received step signal RPC");
+ _step_step_signal(sls, msg);
+ slurm_free_job_step_kill_msg(msg->data);
+ break;
case PMI_KVS_PUT_REQ:
debug2("PMI_KVS_PUT_REQ received");
rc = pmi_kvs_put((struct kvs_comm_set *) msg->data);
@@ -1285,7 +1314,7 @@
_task_user_managed_io_handler(sls, msg);
break;
default:
- error("received spurious message type: %d",
+ error("received spurious message type: %u",
msg->msg_type);
break;
}
@@ -1306,10 +1335,13 @@
slurm_msg_t req;
step_complete_msg_t msg;
int rc = -1;
- int nodeid = NO_VAL;
+ int nodeid = 0;
struct step_launch_state *sls = ctx->launch_state;
+#ifndef HAVE_FRONT_END
+ /* It is always 0 for front end systems */
nodeid = nodelist_find(ctx->step_resp->step_layout->node_list, node);
+#endif
pthread_mutex_lock(&sls->lock);
sls->abort = true;
@@ -1663,7 +1695,3 @@
pthread_mutex_unlock(&sls->lock);
return NULL;
}
-
-
-
-
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index c70a16d..9fece1e 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -19,7 +19,7 @@
extra_unsetenv_src =
endif
-INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) $(HWLOC_CPPFLAGS)
noinst_PROGRAMS = libcommon.o libeio.o libspank.o
# This is needed if compiling on windows
@@ -32,14 +32,15 @@
libspank.la
libcommon_la_SOURCES = \
- xcgroup_read_config.c xcgroup_read_config.h \
- xcgroup.c xcgroup.h \
- xcpuinfo.c xcpuinfo.h \
+ xcgroup_read_config.c xcgroup_read_config.h \
+ xcgroup.c xcgroup.h \
+ xcpuinfo.c xcpuinfo.h \
assoc_mgr.c assoc_mgr.h \
xmalloc.c xmalloc.h \
xassert.c xassert.h \
xstring.c xstring.h \
xsignal.c xsignal.h \
+ strnatcmp.c strnatcmp.h \
forward.c forward.h \
strlcpy.c strlcpy.h \
list.c list.h \
@@ -97,6 +98,7 @@
getopt.h getopt.c getopt1.c \
$(build_unsetenv_src) \
slurm_selecttype_info.c \
+ slurm_selecttype_info.h \
slurm_resource_info.c \
slurm_resource_info.h \
hostlist.c hostlist.h \
diff --git a/src/common/Makefile.in b/src/common/Makefile.in
index ec5b3b4..e3d12d9 100644
--- a/src/common/Makefile.in
+++ b/src/common/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -102,14 +102,14 @@
xcgroup_read_config.h xcgroup.c xcgroup.h xcpuinfo.c \
xcpuinfo.h assoc_mgr.c assoc_mgr.h xmalloc.c xmalloc.h \
xassert.c xassert.h xstring.c xstring.h xsignal.c xsignal.h \
- forward.c forward.h strlcpy.c strlcpy.h list.c list.h net.c \
- net.h log.c log.h cbuf.c cbuf.h safeopen.c safeopen.h \
- bitstring.c bitstring.h mpi.c mpi.h pack.c pack.h \
- parse_config.c parse_config.h parse_spec.c parse_spec.h \
- plugin.c plugin.h plugrack.c plugrack.h print_fields.c \
- print_fields.h read_config.c read_config.h node_select.c \
- node_select.h env.c env.h fd.c fd.h slurm_cred.h slurm_cred.c \
- slurm_errno.c slurm_priority.c slurm_priority.h \
+ strnatcmp.c strnatcmp.h forward.c forward.h strlcpy.c \
+ strlcpy.h list.c list.h net.c net.h log.c log.h cbuf.c cbuf.h \
+ safeopen.c safeopen.h bitstring.c bitstring.h mpi.c mpi.h \
+ pack.c pack.h parse_config.c parse_config.h parse_spec.c \
+ parse_spec.h plugin.c plugin.h plugrack.c plugrack.h \
+ print_fields.c print_fields.h read_config.c read_config.h \
+ node_select.c node_select.h env.c env.h fd.c fd.h slurm_cred.h \
+ slurm_cred.c slurm_errno.c slurm_priority.c slurm_priority.h \
slurm_protocol_api.c slurm_protocol_api.h \
slurm_protocol_pack.c slurm_protocol_pack.h \
slurm_protocol_util.c slurm_protocol_util.h \
@@ -127,35 +127,36 @@
slurm_topology.c slurm_topology.h switch.c switch.h arg_desc.c \
arg_desc.h macros.h malloc.c malloc.h getopt.h getopt.c \
getopt1.c unsetenv.c unsetenv.h slurm_selecttype_info.c \
- slurm_resource_info.c slurm_resource_info.h hostlist.c \
- hostlist.h slurm_step_layout.c slurm_step_layout.h \
- checkpoint.c checkpoint.h job_resources.c job_resources.h \
- parse_time.c parse_time.h job_options.c job_options.h \
- global_defaults.c timers.c timers.h slurm_xlator.h stepd_api.c \
- stepd_api.h write_labelled_message.c write_labelled_message.h \
- proc_args.c proc_args.h slurm_strcasestr.c slurm_strcasestr.h \
- node_conf.h node_conf.c gres.h gres.c
+ slurm_selecttype_info.h slurm_resource_info.c \
+ slurm_resource_info.h hostlist.c hostlist.h \
+ slurm_step_layout.c slurm_step_layout.h checkpoint.c \
+ checkpoint.h job_resources.c job_resources.h parse_time.c \
+ parse_time.h job_options.c job_options.h global_defaults.c \
+ timers.c timers.h slurm_xlator.h stepd_api.c stepd_api.h \
+ write_labelled_message.c write_labelled_message.h proc_args.c \
+ proc_args.h slurm_strcasestr.c slurm_strcasestr.h node_conf.h \
+ node_conf.c gres.h gres.c
@HAVE_UNSETENV_FALSE@am__objects_1 = unsetenv.lo
am_libcommon_la_OBJECTS = xcgroup_read_config.lo xcgroup.lo \
xcpuinfo.lo assoc_mgr.lo xmalloc.lo xassert.lo xstring.lo \
- xsignal.lo forward.lo strlcpy.lo list.lo net.lo log.lo cbuf.lo \
- safeopen.lo bitstring.lo mpi.lo pack.lo parse_config.lo \
- parse_spec.lo plugin.lo plugrack.lo print_fields.lo \
- read_config.lo node_select.lo env.lo fd.lo slurm_cred.lo \
- slurm_errno.lo slurm_priority.lo slurm_protocol_api.lo \
- slurm_protocol_pack.lo slurm_protocol_util.lo \
- slurm_protocol_socket_implementation.lo slurm_protocol_defs.lo \
- slurm_rlimits_info.lo slurmdb_defs.lo slurmdb_pack.lo \
- slurmdbd_defs.lo working_cluster.lo uid.lo util-net.lo \
- slurm_auth.lo jobacct_common.lo slurm_accounting_storage.lo \
- slurm_jobacct_gather.lo slurm_jobcomp.lo slurm_topology.lo \
- switch.lo arg_desc.lo malloc.lo getopt.lo getopt1.lo \
- $(am__objects_1) slurm_selecttype_info.lo \
- slurm_resource_info.lo hostlist.lo slurm_step_layout.lo \
- checkpoint.lo job_resources.lo parse_time.lo job_options.lo \
- global_defaults.lo timers.lo stepd_api.lo \
- write_labelled_message.lo proc_args.lo slurm_strcasestr.lo \
- node_conf.lo gres.lo
+ xsignal.lo strnatcmp.lo forward.lo strlcpy.lo list.lo net.lo \
+ log.lo cbuf.lo safeopen.lo bitstring.lo mpi.lo pack.lo \
+ parse_config.lo parse_spec.lo plugin.lo plugrack.lo \
+ print_fields.lo read_config.lo node_select.lo env.lo fd.lo \
+ slurm_cred.lo slurm_errno.lo slurm_priority.lo \
+ slurm_protocol_api.lo slurm_protocol_pack.lo \
+ slurm_protocol_util.lo slurm_protocol_socket_implementation.lo \
+ slurm_protocol_defs.lo slurm_rlimits_info.lo slurmdb_defs.lo \
+ slurmdb_pack.lo slurmdbd_defs.lo working_cluster.lo uid.lo \
+ util-net.lo slurm_auth.lo jobacct_common.lo \
+ slurm_accounting_storage.lo slurm_jobacct_gather.lo \
+ slurm_jobcomp.lo slurm_topology.lo switch.lo arg_desc.lo \
+ malloc.lo getopt.lo getopt1.lo $(am__objects_1) \
+ slurm_selecttype_info.lo slurm_resource_info.lo hostlist.lo \
+ slurm_step_layout.lo checkpoint.lo job_resources.lo \
+ parse_time.lo job_options.lo global_defaults.lo timers.lo \
+ stepd_api.lo write_labelled_message.lo proc_args.lo \
+ slurm_strcasestr.lo node_conf.lo gres.lo
am__EXTRA_libcommon_la_SOURCES_DIST = unsetenv.c unsetenv.h
libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS)
libcommon_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -283,6 +284,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -319,6 +321,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -412,7 +415,7 @@
@HAVE_UNSETENV_TRUE@build_unsetenv_src =
@HAVE_UNSETENV_FALSE@extra_unsetenv_src =
@HAVE_UNSETENV_TRUE@extra_unsetenv_src = unsetenv.c unsetenv.h
-INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) $(HWLOC_CPPFLAGS)
noinst_LTLIBRARIES = \
libcommon.la \
libdaemonize.la \
@@ -420,14 +423,15 @@
libspank.la
libcommon_la_SOURCES = \
- xcgroup_read_config.c xcgroup_read_config.h \
- xcgroup.c xcgroup.h \
- xcpuinfo.c xcpuinfo.h \
+ xcgroup_read_config.c xcgroup_read_config.h \
+ xcgroup.c xcgroup.h \
+ xcpuinfo.c xcpuinfo.h \
assoc_mgr.c assoc_mgr.h \
xmalloc.c xmalloc.h \
xassert.c xassert.h \
xstring.c xstring.h \
xsignal.c xsignal.h \
+ strnatcmp.c strnatcmp.h \
forward.c forward.h \
strlcpy.c strlcpy.h \
list.c list.h \
@@ -485,6 +489,7 @@
getopt.h getopt.c getopt1.c \
$(build_unsetenv_src) \
slurm_selecttype_info.c \
+ slurm_selecttype_info.h \
slurm_resource_info.c \
slurm_resource_info.h \
hostlist.c hostlist.h \
@@ -576,13 +581,13 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libcommon.la: $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES)
+libcommon.la: $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(EXTRA_libcommon_la_DEPENDENCIES)
$(libcommon_la_LINK) $(libcommon_la_OBJECTS) $(libcommon_la_LIBADD) $(LIBS)
-libdaemonize.la: $(libdaemonize_la_OBJECTS) $(libdaemonize_la_DEPENDENCIES)
+libdaemonize.la: $(libdaemonize_la_OBJECTS) $(libdaemonize_la_DEPENDENCIES) $(EXTRA_libdaemonize_la_DEPENDENCIES)
$(LINK) $(libdaemonize_la_OBJECTS) $(libdaemonize_la_LIBADD) $(LIBS)
-libeio.la: $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES)
+libeio.la: $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES) $(EXTRA_libeio_la_DEPENDENCIES)
$(LINK) $(libeio_la_OBJECTS) $(libeio_la_LIBADD) $(LIBS)
-libspank.la: $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES)
+libspank.la: $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES) $(EXTRA_libspank_la_DEPENDENCIES)
$(LINK) $(libspank_la_OBJECTS) $(libspank_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@@ -661,6 +666,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurmdbd_defs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stepd_api.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strlcpy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strnatcmp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/switch.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timers.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uid.Plo@am__quote@
@@ -799,10 +805,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c
index 69f5f0c..4e2f7c6 100644
--- a/src/common/assoc_mgr.c
+++ b/src/common/assoc_mgr.c
@@ -99,6 +99,7 @@
return SLURM_ERROR;
assoc1->usage->grp_used_cpus += assoc2->usage->grp_used_cpus;
+ assoc1->usage->grp_used_mem += assoc2->usage->grp_used_mem;
assoc1->usage->grp_used_nodes += assoc2->usage->grp_used_nodes;
assoc1->usage->grp_used_wall += assoc2->usage->grp_used_wall;
assoc1->usage->grp_used_cpu_run_secs +=
@@ -117,6 +118,7 @@
return SLURM_ERROR;
assoc->usage->grp_used_cpus = 0;
+ assoc->usage->grp_used_mem = 0;
assoc->usage->grp_used_nodes = 0;
assoc->usage->grp_used_cpu_run_secs = 0;
@@ -163,6 +165,7 @@
return SLURM_ERROR;
qos->usage->grp_used_cpus = 0;
+ qos->usage->grp_used_mem = 0;
qos->usage->grp_used_nodes = 0;
qos->usage->grp_used_cpu_run_secs = 0;
@@ -1320,12 +1323,15 @@
if (assoc_mgr_refresh_lists(db_conn, NULL) == SLURM_ERROR)
return SLURM_ERROR;
+ assoc_mgr_lock(&locks);
+
if ((!assoc_mgr_association_list
|| !list_count(assoc_mgr_association_list))
- && !(enforce & ACCOUNTING_ENFORCE_ASSOCS))
+ && !(enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
+ assoc_mgr_unlock(&locks);
return SLURM_SUCCESS;
+ }
- assoc_mgr_lock(&locks);
itr = list_iterator_create(assoc_mgr_association_list);
while ((found_assoc = list_next(itr))) {
if (assoc->uid != found_assoc->uid) {
@@ -1340,12 +1346,12 @@
list_iterator_destroy(itr);
assoc_mgr_unlock(&locks);
- if (set)
- return SLURM_SUCCESS;
- else {
- debug("user %u does not have any associations", assoc->uid);
- return SLURM_ERROR;
+ if (!set) {
+ debug("UID %u has no associations", assoc->uid);
+ if (enforce & ACCOUNTING_ENFORCE_ASSOCS)
+ return SLURM_ERROR;
}
+ return SLURM_SUCCESS;
}
extern int assoc_mgr_fill_in_assoc(void *db_conn,
@@ -1508,6 +1514,7 @@
assoc->grp_cpu_run_mins= ret_assoc->grp_cpu_run_mins;
assoc->grp_cpus = ret_assoc->grp_cpus;
assoc->grp_jobs = ret_assoc->grp_jobs;
+ assoc->grp_mem = ret_assoc->grp_mem;
assoc->grp_nodes = ret_assoc->grp_nodes;
assoc->grp_submit_jobs = ret_assoc->grp_submit_jobs;
assoc->grp_wall = ret_assoc->grp_wall;
@@ -1595,11 +1602,13 @@
if (_get_assoc_mgr_user_list(db_conn, enforce) == SLURM_ERROR)
return SLURM_ERROR;
- if ((!assoc_mgr_user_list || !list_count(assoc_mgr_user_list))
- && !(enforce & ACCOUNTING_ENFORCE_ASSOCS))
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if ((!assoc_mgr_user_list || !list_count(assoc_mgr_user_list))
+ && !(enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_user_list);
while ((found_user = list_next(itr))) {
if (user->uid != NO_VAL) {
@@ -1663,11 +1672,13 @@
if (_get_assoc_mgr_qos_list(db_conn, enforce) == SLURM_ERROR)
return SLURM_ERROR;
- if ((!assoc_mgr_qos_list || !list_count(assoc_mgr_qos_list))
- && !(enforce & ACCOUNTING_ENFORCE_QOS))
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if ((!assoc_mgr_qos_list || !list_count(assoc_mgr_qos_list))
+ && !(enforce & ACCOUNTING_ENFORCE_QOS)) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_qos_list);
while ((found_qos = list_next(itr))) {
if (qos->id == found_qos->id)
@@ -1699,6 +1710,7 @@
qos->grp_cpu_run_mins= found_qos->grp_cpu_run_mins;
qos->grp_cpus = found_qos->grp_cpus;
qos->grp_jobs = found_qos->grp_jobs;
+ qos->grp_mem = found_qos->grp_mem;
qos->grp_nodes = found_qos->grp_nodes;
qos->grp_submit_jobs = found_qos->grp_submit_jobs;
qos->grp_wall = found_qos->grp_wall;
@@ -1931,10 +1943,12 @@
if (_get_assoc_mgr_user_list(db_conn, 0) == SLURM_ERROR)
return SLURMDB_ADMIN_NOTSET;
- if (!assoc_mgr_user_list)
- return SLURMDB_ADMIN_NOTSET;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_user_list) {
+ assoc_mgr_unlock(&locks);
+ return SLURMDB_ADMIN_NOTSET;
+ }
+
itr = list_iterator_create(assoc_mgr_user_list);
while ((found_user = list_next(itr))) {
if (uid == found_user->uid)
@@ -1963,10 +1977,12 @@
if (_get_assoc_mgr_user_list(db_conn, 0) == SLURM_ERROR)
return false;
- if (!assoc_mgr_user_list)
- return false;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_user_list) {
+ assoc_mgr_unlock(&locks);
+ return false;
+ }
+
itr = list_iterator_create(assoc_mgr_user_list);
while ((found_user = list_next(itr))) {
if (uid == found_user->uid)
@@ -2121,6 +2137,9 @@
share->shares_norm = assoc->usage->shares_norm;
share->usage_raw = (uint64_t)assoc->usage->usage_raw;
+ share->grp_cpu_mins = assoc->grp_cpu_mins;
+ share->cpu_run_mins = assoc->usage->grp_used_cpu_run_secs / 60;
+
if (assoc->user) {
/* We only calculate user effective usage when
* we need it
@@ -2231,10 +2250,12 @@
assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
WRITE_LOCK, WRITE_LOCK, NO_LOCK };
- if (!assoc_mgr_association_list)
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_association_list) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_association_list);
while ((object = list_pop(update->objects))) {
bool update_jobs = false;
@@ -2332,6 +2353,10 @@
}
if (object->grp_jobs != NO_VAL)
rec->grp_jobs = object->grp_jobs;
+ if (object->grp_mem != NO_VAL) {
+ update_jobs = true;
+ rec->grp_mem = object->grp_mem;
+ }
if (object->grp_nodes != NO_VAL) {
update_jobs = true;
rec->grp_nodes = object->grp_nodes;
@@ -2636,10 +2661,12 @@
assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK,
NO_LOCK, WRITE_LOCK, WRITE_LOCK };
- if (!assoc_mgr_wckey_list)
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_wckey_list) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_wckey_list);
while ((object = list_pop(update->objects))) {
if (object->cluster && assoc_mgr_cluster_name) {
@@ -2755,10 +2782,12 @@
assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
NO_LOCK, WRITE_LOCK, WRITE_LOCK };
- if (!assoc_mgr_user_list)
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_user_list) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_user_list);
while ((object = list_pop(update->objects))) {
list_iterator_reset(itr);
@@ -2877,10 +2906,12 @@
assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
WRITE_LOCK, NO_LOCK, NO_LOCK };
- if (!assoc_mgr_qos_list)
- return SLURM_SUCCESS;
-
assoc_mgr_lock(&locks);
+ if (!assoc_mgr_qos_list) {
+ assoc_mgr_unlock(&locks);
+ return SLURM_SUCCESS;
+ }
+
itr = list_iterator_create(assoc_mgr_qos_list);
while ((object = list_pop(update->objects))) {
bool update_jobs = false;
@@ -2957,6 +2988,10 @@
}
if (object->grp_jobs != NO_VAL)
rec->grp_jobs = object->grp_jobs;
+ if (object->grp_mem != NO_VAL) {
+ update_jobs = true;
+ rec->grp_mem = object->grp_mem;
+ }
if (object->grp_nodes != NO_VAL) {
update_jobs = true;
rec->grp_nodes = object->grp_nodes;
@@ -3174,12 +3209,14 @@
if (assoc_mgr_refresh_lists(db_conn, NULL) == SLURM_ERROR)
return SLURM_ERROR;
+ assoc_mgr_lock(&locks);
if ((!assoc_mgr_association_list
|| !list_count(assoc_mgr_association_list))
- && !(enforce & ACCOUNTING_ENFORCE_ASSOCS))
+ && !(enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
+ assoc_mgr_unlock(&locks);
return SLURM_SUCCESS;
+ }
- assoc_mgr_lock(&locks);
itr = list_iterator_create(assoc_mgr_association_list);
while ((found_assoc = list_next(itr))) {
if (assoc_id == found_assoc->id)
diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h
index c280ee6..8924410 100644
--- a/src/common/assoc_mgr.h
+++ b/src/common/assoc_mgr.h
@@ -102,6 +102,8 @@
uint32_t grp_used_cpus; /* count of active jobs in the group
* (DON'T PACK) */
+ uint32_t grp_used_mem; /* count of active memory in the group
+ * (DON'T PACK) */
uint32_t grp_used_nodes; /* count of active jobs in the group
* (DON'T PACK) */
double grp_used_wall; /* group count of time used in
@@ -139,6 +141,8 @@
uint64_t grp_used_cpu_run_secs; /* count of running cpu secs
* (DON'T PACK) */
uint32_t grp_used_jobs; /* count of active jobs (DON'T PACK) */
+ uint32_t grp_used_mem; /* count of memory in use in this qos
+ * (DON'T PACK) */
uint32_t grp_used_nodes; /* count of nodes in use in this qos
* (DON'T PACK) */
uint32_t grp_used_submit_jobs; /* count of jobs pending or running
@@ -343,12 +347,16 @@
extern int dump_assoc_mgr_state(char *state_save_location);
/*
- * Read in the usage for association if the database
- * is up when starting.
+ * Read in the past usage for associations.
*/
extern int load_assoc_usage(char *state_save_location);
/*
+ * Read in the past usage for qos.
+ */
+extern int load_qos_usage(char *state_save_location);
+
+/*
* Read in the information of the association mgr if the database
* isn't up when starting.
*/
diff --git a/src/common/eio.c b/src/common/eio.c
index 82858c6..481e7a1 100644
--- a/src/common/eio.c
+++ b/src/common/eio.c
@@ -197,7 +197,8 @@
int eio_signal_shutdown(eio_handle_t *eio)
{
char c = 1;
- if (write(eio->fds[1], &c, sizeof(char)) != 1)
+
+ if (eio && (write(eio->fds[1], &c, sizeof(char)) != 1))
return error("eio_handle_signal_shutdown: write; %m");
return 0;
}
@@ -480,7 +481,7 @@
/*
* Add an eio_obj_t "obj" to an eio_handle_t "eio"'s internal object list.
*
- * This function can only be used to intialize "eio"'s list before
+ * This function can only be used to initialize "eio"'s list before
* calling eio_handle_mainloop. If it is used after the eio engine's
* mainloop has started, segfaults are likely.
*/
diff --git a/src/common/eio.h b/src/common/eio.h
index 12bdbe9..afee1fd 100644
--- a/src/common/eio.h
+++ b/src/common/eio.h
@@ -75,7 +75,7 @@
/*
* Add an eio_obj_t "obj" to an eio_handle_t "eio"'s internal object list.
*
- * This function can only be used to intialize "eio"'s list before
+ * This function can only be used to initialize "eio"'s list before
* calling eio_handle_mainloop. If it is used after the eio engine's
* mainloop has started, segfaults are likely.
*/
diff --git a/src/common/env.c b/src/common/env.c
index 3236e07..eddf3da 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -147,9 +147,10 @@
if (resv_id) {
setenvf(dest, "BASIL_RESERVATION_ID", "%u", resv_id);
} else {
- error("Can't set BASIL_RESERVATION_ID "
- "environment variable");
- rc = SLURM_FAILURE;
+ /* This is not an error for a SLURM job allocation with
+ * no compute nodes and no BASIL reservation */
+ verbose("Can't set BASIL_RESERVATION_ID "
+ "environment variable");
}
} else if (cluster_flags & CLUSTER_FLAG_AIX) {
@@ -707,10 +708,16 @@
rc = SLURM_FAILURE;
}
- if (env->stepid >= 0
- && setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) {
- error("Unable to set SLURM_STEPID environment");
- rc = SLURM_FAILURE;
+ if (env->stepid >= 0) {
+ if (setenvf(&env->env, "SLURM_STEP_ID", "%d", env->stepid)) {
+ error("Unable to set SLURM_STEP_ID environment");
+ rc = SLURM_FAILURE;
+ }
+ /* and for backwards compatability... */
+ if (setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) {
+ error("Unable to set SLURM_STEPID environment");
+ rc = SLURM_FAILURE;
+ }
}
if (!preserve_env && env->nhosts
@@ -906,6 +913,7 @@
* SLURM_JOB_NUM_NODES
* SLURM_JOB_NODELIST
* SLURM_JOB_CPUS_PER_NODE
+ * SLURM_NODE_ALIASES
* LOADLBATCH (AIX only)
* SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and
* MPIRUN_NOALLOCATE (BG only)
@@ -945,17 +953,19 @@
env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt);
env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s",
alloc->node_list);
+ env_array_overwrite_fmt(dest, "SLURM_NODE_ALIASES", "%s",
+ alloc->alias_list);
_set_distribution(desc->task_dist, &dist, &lllp_dist);
- if(dist)
+ if (dist)
env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s",
dist);
- if(desc->task_dist == SLURM_DIST_PLANE)
+ if (desc->task_dist == SLURM_DIST_PLANE)
env_array_overwrite_fmt(dest, "SLURM_DIST_PLANESIZE",
"%u", desc->plane_size);
- if(lllp_dist)
+ if (lllp_dist)
env_array_overwrite_fmt(dest, "SLURM_DIST_LLLP", "%s",
lllp_dist);
@@ -1054,6 +1064,7 @@
* SLURM_JOB_NUM_NODES
* SLURM_JOB_NODELIST
* SLURM_JOB_CPUS_PER_NODE
+ * SLURM_NODE_ALIASES
* ENVIRONMENT=BATCH
* HOSTNAME
* LOADLBATCH (AIX only)
@@ -1095,6 +1106,8 @@
"%u", num_nodes);
env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes);
+ env_array_overwrite_fmt(dest, "SLURM_NODE_ALIASES", "%s",
+ batch->alias_list);
tmp = uint32_compressed_to_str(batch->num_cpu_groups,
batch->cpus_per_node,
diff --git a/src/common/forward.c b/src/common/forward.c
index 91aca3a..17375e0 100644
--- a/src/common/forward.c
+++ b/src/common/forward.c
@@ -175,7 +175,8 @@
}
if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) ||
- (fwd_msg->header.msg_type == REQUEST_RECONFIGURE)) {
+ (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) ||
+ (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) {
slurm_mutex_lock(fwd_msg->forward_mutex);
ret_data_info = xmalloc(sizeof(ret_data_info_t));
list_push(fwd_msg->ret_list, ret_data_info);
@@ -542,7 +543,7 @@
ret_list = list_create(destroy_data_info);
- while((name = hostlist_shift(hl))) {
+ while ((name = hostlist_shift(hl))) {
pthread_attr_t attr_agent;
pthread_t thread_agent;
int retries = 0;
@@ -566,16 +567,16 @@
fwd_tree->tree_hl = hostlist_create(name);
free(name);
- for(j = 0; j < span[thr_count]; j++) {
+ for (j = 0; j < span[thr_count]; j++) {
name = hostlist_shift(hl);
- if(!name)
+ if (!name)
break;
hostlist_push(fwd_tree->tree_hl, name);
free(name);
}
- while(pthread_create(&thread_agent, &attr_agent,
- _fwd_tree_thread, (void *)fwd_tree)) {
+ while (pthread_create(&thread_agent, &attr_agent,
+ _fwd_tree_thread, (void *)fwd_tree)) {
error("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
@@ -590,7 +591,7 @@
count = list_count(ret_list);
debug2("Tree head got back %d looking for %d", count, host_count);
- while((count < host_count)) {
+ while ((count < host_count)) {
pthread_cond_wait(¬ify, &tree_mutex);
count = list_count(ret_list);
debug2("Tree head got back %d", count);
diff --git a/src/common/gres.c b/src/common/gres.c
index 0f2e45a..a6f6457 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -91,6 +91,8 @@
void *gres_ptr );
void (*step_set_env) ( char ***job_env_ptr,
void *gres_ptr );
+ void (*send_stepd) ( int fd );
+ void (*recv_stepd) ( int fd );
} slurm_gres_ops_t;
/* Gres plugin context, one for each gres type */
@@ -122,6 +124,7 @@
static char *gres_plugin_list = NULL;
static pthread_mutex_t gres_context_lock = PTHREAD_MUTEX_INITIALIZER;
static List gres_conf_list = NULL;
+static bool init_run = false;
/* Local functions */
static gres_node_state_t *
@@ -137,7 +140,8 @@
static void _gres_job_list_delete(void *list_element);
extern int _job_alloc(void *job_gres_data, void *node_gres_data,
int node_cnt, int node_offset, uint32_t cpu_cnt,
- char *gres_name, uint32_t job_id, char *node_name);
+ char *gres_name, uint32_t job_id, char *node_name,
+ bitstr_t *core_bitmap);
static int _job_config_validate(char *config, uint32_t *gres_cnt,
slurm_gres_context_t *context_ptr);
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
@@ -239,6 +243,8 @@
"node_config_load",
"job_set_env",
"step_set_env",
+ "send_stepd",
+ "recv_stepd",
};
int n_syms = sizeof(syms) / sizeof(char *);
@@ -261,9 +267,8 @@
return SLURM_ERROR;
}
- verbose("gres: Couldn't find the specified plugin name for %s "
- "looking at all files",
- plugin_context->gres_type);
+ debug("gres: Couldn't find the specified plugin name for %s looking "
+ "at all files", plugin_context->gres_type);
/* Get plugin list */
if (plugin_context->plugin_list == NULL) {
@@ -286,8 +291,8 @@
plugin_context->plugin_list,
plugin_context->gres_type );
if (plugin_context->cur_plugin == PLUGIN_INVALID_HANDLE) {
- verbose("Cannot find plugin of type %s",
- plugin_context->gres_type);
+ debug("Cannot find plugin of type %s, just track gres counts",
+ plugin_context->gres_type);
return SLURM_ERROR;
}
@@ -334,6 +339,9 @@
int i, j, rc = SLURM_SUCCESS;
char *last = NULL, *names, *one_name, *full_name;
+ if (init_run && (gres_context_cnt >= 0))
+ return rc;
+
slurm_mutex_lock(&gres_context_lock);
if (slurm_get_debug_flags() & DEBUG_FLAG_GRES)
gres_debug = true;
@@ -399,6 +407,7 @@
gres_context[i].gres_name_colon_len =
strlen(gres_context[i].gres_name_colon);
}
+ init_run = true;
fini: slurm_mutex_unlock(&gres_context_lock);
return rc;
@@ -417,6 +426,7 @@
if (gres_context_cnt < 0)
goto fini;
+ init_run = false;
for (i=0; i<gres_context_cnt; i++) {
j = _unload_gres_plugin(gres_context + i);
if (j != SLURM_SUCCESS)
@@ -696,6 +706,10 @@
fatal("Invalid gres data for %s, Count does not match "
"File value", p->name);
}
+ if ((tmp_long < 0) || (tmp_long >= NO_VAL)) {
+ fatal("Gres %s has invalid count value %ld",
+ p->name, tmp_long);
+ }
p->count = tmp_long;
xfree(tmp_str);
} else if (p->count == 0)
@@ -1906,9 +1920,9 @@
if (gres_node_ptr->topo_cpus_bitmap[i]) {
bit_fmt(tmp_str, sizeof(tmp_str),
gres_node_ptr->topo_cpus_bitmap[i]);
- info(" topo_cpu_bitmap[%d]:%s", i, tmp_str);
+ info(" topo_cpus_bitmap[%d]:%s", i, tmp_str);
} else
- info(" topo_cpu_bitmap[%d]:NULL", i);
+ info(" topo_cpus_bitmap[%d]:NULL", i);
if (gres_node_ptr->topo_cpus_bitmap[i]) {
bit_fmt(tmp_str, sizeof(tmp_str),
gres_node_ptr->topo_gres_bitmap[i]);
@@ -2676,7 +2690,8 @@
extern int _job_alloc(void *job_gres_data, void *node_gres_data,
int node_cnt, int node_offset, uint32_t cpu_cnt,
- char *gres_name, uint32_t job_id, char *node_name)
+ char *gres_name, uint32_t job_id, char *node_name,
+ bitstr_t *core_bitmap)
{
int i;
uint32_t gres_cnt;
@@ -2773,7 +2788,17 @@
job_gres_ptr->gres_bit_alloc[node_offset] &&
node_gres_ptr->topo_gres_bitmap &&
node_gres_ptr->topo_gres_cnt_alloc) {
- for (i=0; i<node_gres_ptr->topo_cnt; i++) {
+ for (i = 0; i < node_gres_ptr->topo_cnt; i++) {
+ /* Insure that if specific CPUs are associated with
+ * specific GRES and the CPU count matches the
+ * slurmctld configuration that we only use the GRES
+ * on the CPUs that have already been allocated. */
+ if (core_bitmap &&
+ (bit_size(core_bitmap) ==
+ bit_size(node_gres_ptr->topo_cpus_bitmap[i])) &&
+ !bit_overlap(core_bitmap,
+ node_gres_ptr->topo_cpus_bitmap[i]))
+ continue;
gres_cnt = bit_overlap(job_gres_ptr->
gres_bit_alloc[node_offset],
node_gres_ptr->
@@ -2795,12 +2820,14 @@
* IN cpu_cnt - number of CPUs allocated to this job on this node
* IN job_id - job's ID (for logging)
* IN node_name - name of the node (for logging)
+ * IN core_bitmap - cores allocated to this job on this node (NULL if not
+ * available)
* RET SLURM_SUCCESS or error code
*/
extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list,
int node_cnt, int node_offset,
uint32_t cpu_cnt, uint32_t job_id,
- char *node_name)
+ char *node_name, bitstr_t *core_bitmap)
{
int i, rc, rc2;
ListIterator job_gres_iter, node_gres_iter;
@@ -2849,7 +2876,8 @@
rc2 = _job_alloc(job_gres_ptr->gres_data,
node_gres_ptr->gres_data, node_cnt,
node_offset, cpu_cnt,
- gres_context[i].gres_name, job_id, node_name);
+ gres_context[i].gres_name, job_id, node_name,
+ core_bitmap);
if (rc2 != SLURM_SUCCESS)
rc = rc2;
}
@@ -4489,3 +4517,36 @@
slurm_mutex_unlock(&gres_context_lock);
}
+/* Send GRES information to slurmstepd on the specified file descriptor*/
+extern void gres_plugin_send_stepd(int fd)
+{
+ int i;
+
+ (void) gres_plugin_init();
+
+ slurm_mutex_lock(&gres_context_lock);
+ for (i = 0; i < gres_context_cnt; i++) {
+ if (gres_context[i].ops.send_stepd == NULL)
+ continue; /* No plugin to call */
+ (*(gres_context[i].ops.send_stepd)) (fd);
+ break;
+ }
+ slurm_mutex_unlock(&gres_context_lock);
+}
+
+/* Receive GRES information from slurmd on the specified file descriptor*/
+extern void gres_plugin_recv_stepd(int fd)
+{
+ int i;
+
+ (void) gres_plugin_init();
+
+ slurm_mutex_lock(&gres_context_lock);
+ for (i = 0; i < gres_context_cnt; i++) {
+ if (gres_context[i].ops.recv_stepd == NULL)
+ continue; /* No plugin to call */
+ (*(gres_context[i].ops.recv_stepd)) (fd);
+ break;
+ }
+ slurm_mutex_unlock(&gres_context_lock);
+}
diff --git a/src/common/gres.h b/src/common/gres.h
index fd53b26..7ba0ccc 100644
--- a/src/common/gres.h
+++ b/src/common/gres.h
@@ -200,6 +200,12 @@
extern void gres_plugin_step_state_file(List gres_list, int *gres_bit_alloc,
int *gres_count);
+/* Send GRES information to slurmstepd on the specified file descriptor*/
+extern void gres_plugin_send_stepd(int fd);
+
+/* Receive GRES information from slurmd on the specified file descriptor*/
+extern void gres_plugin_recv_stepd(int fd);
+
/*
**************************************************************************
* PLUGIN CALLS FOR SLURMCTLD DAEMON *
@@ -378,12 +384,14 @@
* IN cpu_cnt - number of CPUs allocated to this job on this node
* IN job_id - job's ID (for logging)
* IN node_name - name of the node (for logging)
+ * IN core_bitmap - cores allocated to this job on this node (NULL if not
+ * available)
* RET SLURM_SUCCESS or error code
*/
extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list,
int node_cnt, int node_offset,
uint32_t cpu_cnt, uint32_t job_id,
- char *node_name);
+ char *node_name, bitstr_t *core_bitmap);
/*
* Deallocate resource from a job and update node and job gres information
diff --git a/src/common/hostlist.c b/src/common/hostlist.c
index d02bdd6..7fdc378 100644
--- a/src/common/hostlist.c
+++ b/src/common/hostlist.c
@@ -1136,8 +1136,56 @@
* If hostrange and hostname prefixes don't match, then
* there is way the hostname falls within the range [hr].
*/
- if (strcmp(hr->prefix, hn->prefix) != 0)
- return 0;
+ if (strcmp(hr->prefix, hn->prefix) != 0) {
+ int len1, len2, ldiff;
+ int dims = slurmdb_setup_cluster_name_dims();
+
+ if (dims != 1)
+ return 0;
+
+ /* Below logic was added since primarily for a cray
+ * where people typically drop
+ * leading zeros into the prefix so you can do
+ * something like nid0000[2-7]. But doing this messes
+ * up the hostlist_find since when someone queries
+ * against nid00002 the prefixes don't match. The
+ * below code is there to make sure get the best
+ * chance for comparison.
+ */
+
+ /* First see if by taking some of the leading digits of the
+ * suffix of hn and moving it to the end of the prefix if it
+ * would be a match.
+ */
+ len1 = strlen(hr->prefix);
+ len2 = strlen(hn->prefix);
+ ldiff = len1 - len2;
+
+ if (ldiff > 0 && isdigit(hr->prefix[len1-1])
+ && (strlen(hn->suffix) >= ldiff)) {
+ char *p = '\0';
+ /* Tack on ldiff of the hostname's suffix to that of
+ * it's prefix */
+ hn->prefix = realloc(hn->prefix, len2+ldiff+1);
+ strncat(hn->prefix, hn->suffix, ldiff);
+ /* Now adjust the suffix of the hostname object. */
+ hn->suffix += ldiff;
+ /* And the numeric representation just incase
+ * whatever we just tacked on to the prefix
+ * had something other than 0 in it.
+ *
+ * Since we are only going through this logic for
+ * single dimension systems we will always use
+ * the base 10.
+ */
+ hn->num = strtoul(hn->suffix, &p, 10);
+
+ /* Now compare them and see if they match */
+ if (strcmp(hr->prefix, hn->prefix) != 0)
+ return 0;
+ } else
+ return 0;
+ }
/*
* Finally, check whether [hn], with a valid numeric suffix,
diff --git a/src/common/hostlist.h b/src/common/hostlist.h
index 80d9159..be6539f 100644
--- a/src/common/hostlist.h
+++ b/src/common/hostlist.h
@@ -64,6 +64,12 @@
#endif
#define HIGHEST_BASE 36
+#define FREE_NULL_HOSTLIST(_X) \
+ do { \
+ if (_X) hostlist_destroy (_X); \
+ _X = NULL; \
+ } while (0)
+
extern char *alpha_num;
/* Notes:
diff --git a/src/common/job_resources.c b/src/common/job_resources.c
index dfd6fb9..7a6195e 100644
--- a/src/common/job_resources.c
+++ b/src/common/job_resources.c
@@ -1130,6 +1130,53 @@
return set_cnt;
}
+/* Return a copy of core_bitmap only for the specific node */
+extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr,
+ uint32_t node_id)
+{
+ int i, bit_inx = 0, core_cnt = 0;
+ bitstr_t *core_bitmap;
+
+ xassert(job_resrcs_ptr);
+
+ for (i = 0; i < job_resrcs_ptr->nhosts; i++) {
+ if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
+ bit_inx += job_resrcs_ptr->sockets_per_node[i] *
+ job_resrcs_ptr->cores_per_socket[i] *
+ job_resrcs_ptr->sock_core_rep_count[i];
+ node_id -= job_resrcs_ptr->sock_core_rep_count[i];
+ } else {
+ bit_inx += job_resrcs_ptr->sockets_per_node[i] *
+ job_resrcs_ptr->cores_per_socket[i] *
+ node_id;
+ core_cnt = job_resrcs_ptr->sockets_per_node[i] *
+ job_resrcs_ptr->cores_per_socket[i];
+ break;
+ }
+ }
+ if (core_cnt < 1) {
+ error("copy_job_resources_node: core_cnt=0");
+ return NULL;
+ }
+
+ i = bit_size(job_resrcs_ptr->core_bitmap);
+ if ((bit_inx + core_cnt) > i) {
+ error("copy_job_resources_node: offset > bitmap size "
+ "(%d >= %d)", (bit_inx + core_cnt), i);
+ return NULL;
+ }
+
+ core_bitmap = bit_alloc(core_cnt);
+ if (!core_bitmap)
+ fatal("copy_job_resources_node: bit_alloc(%d): %m", core_cnt);
+ for (i = 0; i < core_cnt; i++) {
+ if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
+ bit_set(core_bitmap, i);
+ }
+
+ return core_bitmap;
+}
+
extern int get_job_resources_cnt(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t *socket_cnt,
uint16_t *cores_per_socket_cnt)
diff --git a/src/common/job_resources.h b/src/common/job_resources.h
index d88f24d..83a4a99 100644
--- a/src/common/job_resources.h
+++ b/src/common/job_resources.h
@@ -230,6 +230,10 @@
extern int count_job_resources_node(job_resources_t *job_resrcs_ptr,
uint32_t node_id);
+/* Return a copy of core_bitmap only for the specific node */
+extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr,
+ uint32_t node_id);
+
/* Get socket and core count for a specific node_id (zero origin) */
extern int get_job_resources_cnt(job_resources_t *job_resrcs_ptr,
uint32_t node_id, uint16_t *socket_cnt,
diff --git a/src/common/jobacct_common.c b/src/common/jobacct_common.c
index 987d71a..1d54615 100644
--- a/src/common/jobacct_common.c
+++ b/src/common/jobacct_common.c
@@ -39,7 +39,9 @@
* Copyright (C) 2002 The Regents of the University of California.
\*****************************************************************************/
-#include "jobacct_common.h"
+#include <sys/resource.h>
+
+#include "src/common/jobacct_common.h"
/*
** Define slurm-specific aliases for use by plugins, see slurm_xlator.h
@@ -60,8 +62,13 @@
static void _pack_jobacct_id(jobacct_id_t *jobacct_id,
uint16_t rpc_version, Buf buffer)
{
- pack32((uint32_t)jobacct_id->nodeid, buffer);
- pack16((uint16_t)jobacct_id->taskid, buffer);
+ if (jobacct_id) {
+ pack32((uint32_t) jobacct_id->nodeid, buffer);
+ pack16((uint16_t) jobacct_id->taskid, buffer);
+ } else {
+ pack32((uint32_t) 0, buffer);
+ pack16((uint16_t) 0, buffer);
+ }
}
static int _unpack_jobacct_id(jobacct_id_t *jobacct_id,
@@ -77,7 +84,7 @@
extern int jobacct_common_init_struct(struct jobacctinfo *jobacct,
jobacct_id_t *jobacct_id)
{
- if(!jobacct_id) {
+ if (!jobacct_id) {
jobacct_id_t temp_id;
temp_id.taskid = (uint16_t)NO_VAL;
temp_id.nodeid = (uint32_t)NO_VAL;
@@ -266,45 +273,48 @@
struct jobacctinfo *from)
{
xassert(dest);
- xassert(from);
+
+ if (!from)
+ return;
slurm_mutex_lock(&jobacct_lock);
- if(dest->max_vsize < from->max_vsize) {
+ if (dest->max_vsize < from->max_vsize) {
dest->max_vsize = from->max_vsize;
dest->max_vsize_id = from->max_vsize_id;
}
dest->tot_vsize += from->tot_vsize;
- if(dest->max_rss < from->max_rss) {
+ if (dest->max_rss < from->max_rss) {
dest->max_rss = from->max_rss;
dest->max_rss_id = from->max_rss_id;
}
dest->tot_rss += from->tot_rss;
- if(dest->max_pages < from->max_pages) {
+ if (dest->max_pages < from->max_pages) {
dest->max_pages = from->max_pages;
dest->max_pages_id = from->max_pages_id;
}
dest->tot_pages += from->tot_pages;
- if((dest->min_cpu > from->min_cpu)
- || (dest->min_cpu == (uint32_t)NO_VAL)) {
- if(from->min_cpu == (uint32_t)NO_VAL)
+
+ if ((dest->min_cpu > from->min_cpu)
+ || (dest->min_cpu == (uint32_t)NO_VAL)) {
+ if (from->min_cpu == (uint32_t)NO_VAL)
from->min_cpu = 0;
dest->min_cpu = from->min_cpu;
dest->min_cpu_id = from->min_cpu_id;
}
dest->tot_cpu += from->tot_cpu;
- if(dest->max_vsize_id.taskid == (uint16_t)NO_VAL)
+ if (dest->max_vsize_id.taskid == (uint16_t)NO_VAL)
dest->max_vsize_id = from->max_vsize_id;
- if(dest->max_rss_id.taskid == (uint16_t)NO_VAL)
+ if (dest->max_rss_id.taskid == (uint16_t)NO_VAL)
dest->max_rss_id = from->max_rss_id;
- if(dest->max_pages_id.taskid == (uint16_t)NO_VAL)
+ if (dest->max_pages_id.taskid == (uint16_t)NO_VAL)
dest->max_pages_id = from->max_pages_id;
- if(dest->min_cpu_id.taskid == (uint16_t)NO_VAL)
+ if (dest->min_cpu_id.taskid == (uint16_t)NO_VAL)
dest->min_cpu_id = from->min_cpu_id;
dest->user_cpu_sec += from->user_cpu_sec;
@@ -351,15 +361,16 @@
extern void jobacct_common_pack(struct jobacctinfo *jobacct,
uint16_t rpc_version, Buf buffer)
{
- int i=0;
+ int i = 0;
- if(!jobacct) {
- for(i=0; i<16; i++)
+ if (!jobacct) {
+ for (i = 0; i < 12; i++)
pack32((uint32_t) 0, buffer);
- for(i=0; i<4; i++)
- pack16((uint16_t) 0, buffer);
+ for (i = 0; i < 4; i++)
+ _pack_jobacct_id(NULL, rpc_version, buffer);
return;
}
+
slurm_mutex_lock(&jobacct_lock);
pack32((uint32_t)jobacct->user_cpu_sec, buffer);
pack32((uint32_t)jobacct->user_cpu_usec, buffer);
@@ -373,6 +384,7 @@
pack32((uint32_t)jobacct->tot_pages, buffer);
pack32((uint32_t)jobacct->min_cpu, buffer);
pack32((uint32_t)jobacct->tot_cpu, buffer);
+
_pack_jobacct_id(&jobacct->max_vsize_id, rpc_version, buffer);
_pack_jobacct_id(&jobacct->max_rss_id, rpc_version, buffer);
_pack_jobacct_id(&jobacct->max_pages_id, rpc_version, buffer);
@@ -385,6 +397,7 @@
uint16_t rpc_version, Buf buffer)
{
uint32_t uint32_tmp;
+
*jobacct = xmalloc(sizeof(struct jobacctinfo));
safe_unpack32(&uint32_tmp, buffer);
(*jobacct)->user_cpu_sec = uint32_tmp;
@@ -402,22 +415,26 @@
safe_unpack32(&(*jobacct)->tot_pages, buffer);
safe_unpack32(&(*jobacct)->min_cpu, buffer);
safe_unpack32(&(*jobacct)->tot_cpu, buffer);
- if(_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer)
- != SLURM_SUCCESS)
+
+ if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer)
+ != SLURM_SUCCESS)
goto unpack_error;
- if(_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer)
- != SLURM_SUCCESS)
+ if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer)
+ != SLURM_SUCCESS)
goto unpack_error;
- if(_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer)
- != SLURM_SUCCESS)
+ if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer)
+ != SLURM_SUCCESS)
goto unpack_error;
- if(_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer)
- != SLURM_SUCCESS)
+ if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer)
+ != SLURM_SUCCESS)
goto unpack_error;
return SLURM_SUCCESS;
unpack_error:
+ debug2("jobacct_common_unpack:"
+ "unpack_error: size_buf(buffer) %u",
+ size_buf(buffer));
xfree(*jobacct);
return SLURM_ERROR;
}
diff --git a/src/common/jobacct_common.h b/src/common/jobacct_common.h
index be20d37..13a5f0c 100644
--- a/src/common/jobacct_common.h
+++ b/src/common/jobacct_common.h
@@ -62,6 +62,7 @@
#include "src/common/list.h"
#include "src/common/xstring.h"
#include "src/common/node_select.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
#include <ctype.h>
@@ -72,6 +73,7 @@
typedef struct {
uint16_t taskid; /* contains which task number it was on */
uint32_t nodeid; /* contains which node number it was on */
+ slurmd_job_t *job; /* contains slurmd job pointer */
} jobacct_id_t;
struct jobacctinfo {
@@ -94,8 +96,7 @@
(used to figure out ave later) */
uint32_t min_cpu; /* min cpu time */
jobacct_id_t min_cpu_id; /* contains which task it was on */
- uint32_t tot_cpu; /* total cpu time
- (used to figure out ave later) */
+ uint32_t tot_cpu; /* total cpu time(used to figure out ave later) */
};
/* Define jobacctinfo_t below to avoid including extraneous slurm headers */
diff --git a/src/common/mpi.c b/src/common/mpi.c
index 8b3a295..699d343 100644
--- a/src/common/mpi.c
+++ b/src/common/mpi.c
@@ -51,7 +51,6 @@
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
-
/*
* WARNING: Do not change the order of these fields or add additional
* fields at the beginning of the structure. If you do, MPI plugins
@@ -59,6 +58,8 @@
* at the end of the structure.
*/
typedef struct slurm_mpi_ops {
+ int (*slurmstepd_prefork)(const slurmd_job_t *job,
+ char ***env);
int (*slurmstepd_init) (const mpi_plugin_task_info_t *job,
char ***env);
mpi_plugin_client_state_t *
@@ -78,6 +79,7 @@
static slurm_mpi_context_t g_context = NULL;
static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
static slurm_mpi_context_t
_slurm_mpi_context_create(const char *mpi_type)
@@ -142,6 +144,7 @@
* declared for slurm_mpi_ops_t.
*/
static const char *syms[] = {
+ "p_mpi_hook_slurmstepd_prefork",
"p_mpi_hook_slurmstepd_task",
"p_mpi_hook_client_prelaunch",
"p_mpi_hook_client_single_task_per_node",
@@ -209,6 +212,9 @@
char *full_type = NULL;
int got_default = 0;
+ if ( init_run && g_context )
+ return retval;
+
slurm_mutex_lock( &context_lock );
if ( g_context )
@@ -242,7 +248,7 @@
g_context = NULL;
retval = SLURM_ERROR;
}
-
+ init_run = true;
done:
if(got_default)
@@ -265,6 +271,14 @@
return SLURM_SUCCESS;
}
+int mpi_hook_slurmstepd_prefork (const slurmd_job_t *job, char ***env)
+{
+ if (mpi_hook_slurmstepd_init(env) == SLURM_ERROR)
+ return SLURM_ERROR;
+
+ return (*(g_context->ops.slurmstepd_prefork))(job, env);
+}
+
int mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, char ***env)
{
if (mpi_hook_slurmstepd_init(env) == SLURM_ERROR)
@@ -322,6 +336,7 @@
if (!g_context)
return SLURM_SUCCESS;
+ init_run = false;
rc = _slurm_mpi_context_destroy(g_context);
return rc;
}
diff --git a/src/common/mpi.h b/src/common/mpi.h
index 37d1b08..0be96ed 100644
--- a/src/common/mpi.h
+++ b/src/common/mpi.h
@@ -46,6 +46,8 @@
#include <stdbool.h>
#include "slurm/slurm.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+
typedef struct slurm_mpi_context *slurm_mpi_context_t;
typedef void mpi_plugin_client_state_t;
@@ -103,6 +105,9 @@
*/
int mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, char ***env);
+
+int mpi_hook_slurmstepd_prefork (const slurmd_job_t *job, char ***env);
+
/**********************************************************************
* Hooks called by client applications.
* For instance: srun, slaunch, slurm_step_launch().
diff --git a/src/common/node_conf.c b/src/common/node_conf.c
index 80aa1c3..ca16593 100644
--- a/src/common/node_conf.c
+++ b/src/common/node_conf.c
@@ -236,7 +236,7 @@
node_rec->reason = xstrdup(node_ptr->reason);
} else {
/* FIXME - maybe should be fatal? */
- error("reconfiguration for node %s, ignoring!", alias);
+ error("Reconfiguration for node %s, ignoring!", alias);
}
free(alias);
}
@@ -967,7 +967,10 @@
}
}
if (i >= NODE_STATE_END) {
- if (strncasecmp("DRAIN", state_str, 5) == 0)
+ if (strncasecmp("CLOUD", state_str, 5) == 0)
+ state_val = NODE_STATE_IDLE | NODE_STATE_CLOUD |
+ NODE_STATE_POWER_SAVE;
+ else if (strncasecmp("DRAIN", state_str, 5) == 0)
state_val = NODE_STATE_UNKNOWN | NODE_STATE_DRAIN;
else if (strncasecmp("FAIL", state_str, 4) == 0)
state_val = NODE_STATE_IDLE | NODE_STATE_FAIL;
diff --git a/src/common/node_select.c b/src/common/node_select.c
index e999f8a..9efd6a0 100644
--- a/src/common/node_select.c
+++ b/src/common/node_select.c
@@ -66,6 +66,7 @@
static slurm_select_context_t * select_context = NULL;
static pthread_mutex_t select_context_lock =
PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
/*
* Locate and load the appropriate plugin
@@ -117,6 +118,7 @@
"select_p_select_jobinfo_xstrdup",
"select_p_update_block",
"select_p_update_sub_node",
+ "select_p_fail_cnode",
"select_p_get_info_from_plugin",
"select_p_update_node_config",
"select_p_update_node_state",
@@ -238,7 +240,7 @@
{
int dim;
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
- uint16_t cluster_dims = slurmdb_setup_cluster_name_dims();
+ uint16_t cluster_dims = slurmdb_setup_cluster_dims();
if (ba_request == NULL){
error("print_ba_request Error, request is NULL");
@@ -281,6 +283,9 @@
struct dirent *e;
char *dir_array = NULL, *head = NULL;
+ if ( init_run && select_context )
+ return retval;
+
slurm_mutex_lock( &select_context_lock );
if ( select_context )
@@ -449,6 +454,7 @@
}
}
+ init_run = true;
done:
slurm_mutex_unlock( &select_context_lock );
@@ -465,6 +471,7 @@
if (!select_context)
goto fini;
+ init_run = false;
for (i=0; i<select_context_cnt; i++) {
j = _select_context_destroy(select_context + i);
if (j != SLURM_SUCCESS)
@@ -1198,6 +1205,20 @@
}
/*
+ * Fail certain cnodes in a blocks midplane (usually comes from the
+ * IBM runjob mux)
+ * IN step_ptr - step that has failed cnodes
+ */
+extern int select_g_fail_cnode (struct step_record *step_ptr)
+{
+ if (slurm_select_init(0) < 0)
+ return SLURM_ERROR;
+
+ return (*(select_context[select_context_default].ops.
+ fail_cnode))(step_ptr);
+}
+
+/*
* Get select data from a plugin
* IN dinfo - type of data to get from the node record
* (see enum select_plugindata_info)
@@ -1285,18 +1306,11 @@
*/
extern bitstr_t * select_g_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt)
{
-#if 0
- /* Wait for Danny to checkin select/bgq logic before using new plugin
- * function calls. The select_p_resv_test() function is currently only
- * available in select/linear and select/cons_res */
if (slurm_select_init(0) < 0)
return NULL;
return (*(select_context[select_context_default].ops.resv_test))
(avail_bitmap, node_cnt);
-#else
- return bit_pick_cnt(avail_bitmap, node_cnt);
-#endif
}
extern void select_g_ba_init(node_info_msg_t *node_info_ptr, bool sanity_check)
diff --git a/src/common/node_select.h b/src/common/node_select.h
index e55b58b..eeab365 100644
--- a/src/common/node_select.h
+++ b/src/common/node_select.h
@@ -117,6 +117,13 @@
block */
int start_req; /* state there was a start
request */
+ bool full_check; /* This request is to check all
+ * nodes and wires no matter
+ * what. Primarily added to
+ * handle when a nodeboard
+ * goes down to avoid using
+ * the midplane for
+ * passthrough. */
} select_ba_request_t;
/*
@@ -203,6 +210,7 @@
*block_desc_ptr);
int (*update_sub_node) (update_block_msg_t
*block_desc_ptr);
+ int (*fail_cnode) (struct step_record *step_ptr);
int (*get_info_from_plugin) (enum
select_plugindata_info dinfo,
struct job_record *job_ptr,
@@ -401,6 +409,13 @@
*/
extern int select_g_update_block (update_block_msg_t *block_desc_ptr);
+/*
+ * Fail certain cnodes in a blocks midplane (usually comes from the
+ * IBM runjob mux)
+ * IN step_ptr - step with failed cnodes
+ */
+extern int select_g_fail_cnode (struct step_record *step_ptr);
+
/******************************************************\
* JOB SPECIFIC SELECT CREDENTIAL MANAGEMENT FUNCIONS *
\******************************************************/
diff --git a/src/common/pack.h b/src/common/pack.h
index f863382..80b9483 100644
--- a/src/common/pack.h
+++ b/src/common/pack.h
@@ -269,7 +269,7 @@
#define safe_packstr(str,max_len,buf) do { \
uint32_t _size; \
assert(buf->magic == BUF_MAGIC); \
- assert(sizeof(*max_len) === sizeof(uint32_t)); \
+ assert(sizeof(max_len) === sizeof(uint32_t)); \
_size = (str ? strlen(str)+1 : 0); \
assert(_size == 0 || str != NULL); \
if (_size <= max_len) \
diff --git a/src/common/parse_config.c b/src/common/parse_config.c
index acf7e14..27f756e 100644
--- a/src/common/parse_config.c
+++ b/src/common/parse_config.c
@@ -991,6 +991,44 @@
}
/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value)
+{
+ s_p_values_t *p;
+ char *leftover, *v;
+
+ if ((p = _conf_hashtbl_lookup(hashtbl, key)) == NULL) {
+ error("Parsing error at unrecognized key: %s", key);
+ return 0;
+ }
+ /* we have value separated from key here so parse it different way */
+ while (*value != '\0' && isspace(*value))
+ value++; /* skip spaces at start if any */
+ if (*value == '"') { /* quoted value */
+ v = (char *)value + 1;
+ leftover = strchr(v, '"');
+ if (leftover == NULL) {
+ error("Parse error in data for key %s: %s", key, value);
+ return 0;
+ }
+ } else { /* unqouted value */
+ leftover = v = (char *)value;
+ while (*leftover != '\0' && !isspace(*leftover))
+ leftover++;
+ }
+ value = xstrndup(v, leftover - v);
+ if (*leftover != '\0')
+ leftover++;
+ while (*leftover != '\0' && isspace(*leftover))
+ leftover++; /* skip trailing spaces */
+ _handle_keyvalue_match(p, value, leftover, &leftover);
+ xfree(value);
+
+ return 1;
+}
+
+/*
* s_p_get_string
*
* Search for a key in a s_p_hashtbl_t with value of type
diff --git a/src/common/parse_config.h b/src/common/parse_config.h
index d4d6d5a..78d0768 100644
--- a/src/common/parse_config.h
+++ b/src/common/parse_config.h
@@ -191,6 +191,11 @@
/*
* Returns 1 if the line is parsed cleanly, and 0 otherwise.
*/
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value);
+
+/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
int s_p_parse_line(s_p_hashtbl_t *hashtbl, const char *line, char **leftover);
/*
diff --git a/src/common/parse_time.c b/src/common/parse_time.c
index e134c04..3101b41 100644
--- a/src/common/parse_time.c
+++ b/src/common/parse_time.c
@@ -637,16 +637,18 @@
* days-hr:min:sec
* days-hr
* output:
- * minutes (or -2 on error, INFINITE is -1 as defined in slurm.h)
- * if unlimited is the value of string)
+ * minutes for time_str2mins
+ * seconds for time_str2secs
+ * NO_VAL on error
+ * INFINITE for "infinite" or "unlimited"
*/
-extern int time_str2mins(char *string)
+extern int time_str2secs(const char *string)
{
int days = -1, hr = -1, min = -1, sec = -1;
int i, tmp = 0, res = 0;
if ((string == NULL) || (string[0] == '\0'))
- return -1; /* invalid input */
+ return NO_VAL; /* invalid input */
if ((!strcasecmp(string, "-1")) ||
(!strcasecmp(string, "INFINITE")) ||
(!strcasecmp(string, "UNLIMITED"))) {
@@ -658,7 +660,7 @@
tmp = (tmp * 10) + (string[i] - '0');
} else if (string[i] == '-') {
if (days != -1)
- return -2; /* invalid input */
+ return NO_VAL; /* invalid input */
days = tmp;
tmp = 0;
} else if ((string[i] == ':') || (string[i] == '\0')) {
@@ -671,10 +673,10 @@
min = sec;
sec = tmp;
} else
- return -2; /* invalid input */
+ return NO_VAL; /* invalid input */
tmp = 0;
} else
- return -2; /* invalid input */
+ return NO_VAL; /* invalid input */
if (string[i] == '\0')
break;
@@ -695,10 +697,16 @@
min = 0;
if (sec == -1)
sec = 0;
- res = (((days * 24) + hr) * 60) + min;
- res += (sec + 59) / 60; /* round up */
+ res = ((((days * 24) + hr) * 60) + min) * 60 + sec;
return res;
}
+extern int time_str2mins(const char *string)
+{
+ int i = time_str2secs(string);
+ if ((i != INFINITE) && (i != NO_VAL))
+ i = (i + 59) / 60; /* round up */
+ return i;
+}
extern void secs2time_str(time_t time, char *string, int size)
{
@@ -711,14 +719,18 @@
hours = (time / 3600) % 24;
days = time / 86400;
- if (days)
+ if ((days < 0) || (hours < 0) || (minutes < 0) ||
+ (seconds < 0)) {
+ snprintf(string, size, "INVALID");
+ } else if (days) {
snprintf(string, size,
"%ld-%2.2ld:%2.2ld:%2.2ld",
days, hours, minutes, seconds);
- else
+ } else {
snprintf(string, size,
"%2.2ld:%2.2ld:%2.2ld",
hours, minutes, seconds);
+ }
}
}
@@ -732,14 +744,19 @@
minutes = time % 60;
hours = time / 60 % 24;
days = time / 1440;
- if (days)
+
+ if ((days < 0) || (hours < 0) || (minutes < 0) ||
+ (seconds < 0)) {
+ snprintf(string, size, "INVALID");
+ } else if (days) {
snprintf(string, size,
"%ld-%2.2ld:%2.2ld:%2.2ld",
days, hours, minutes, seconds);
- else
+ } else {
snprintf(string, size,
"%2.2ld:%2.2ld:%2.2ld",
hours, minutes, seconds);
+ }
}
}
diff --git a/src/common/parse_time.h b/src/common/parse_time.h
index 5983ebc..e915d00 100644
--- a/src/common/parse_time.h
+++ b/src/common/parse_time.h
@@ -87,9 +87,13 @@
* days-hr:min:sec
* days-hr
* output:
- * minutes
+ * minutes for time_str2mins
+ * seconds for time_str2secs
+ * NO_VAL on error
+ * INFINITE for "infinite" or "unlimited"
*/
-extern int time_str2mins(char *string);
+extern int time_str2mins(const char *string);
+extern int time_str2secs(const char *string);
/* Convert a time value into a string that can be converted back by
* time_str2mins.
diff --git a/src/common/plugstack.c b/src/common/plugstack.c
index e958989..6d306f3 100644
--- a/src/common/plugstack.c
+++ b/src/common/plugstack.c
@@ -70,6 +70,8 @@
struct spank_plugin_operations {
spank_f *init;
+ spank_f *slurmd_init;
+ spank_f *job_prolog;
spank_f *init_post_opt;
spank_f *local_user_init;
spank_f *user_init;
@@ -77,12 +79,16 @@
spank_f *user_task_init;
spank_f *task_post_fork;
spank_f *task_exit;
+ spank_f *job_epilog;
+ spank_f *slurmd_exit;
spank_f *exit;
};
-const int n_spank_syms = 9;
+const int n_spank_syms = 13;
const char *spank_syms[] = {
"slurm_spank_init",
+ "slurm_spank_slurmd_init",
+ "slurm_spank_job_prolog",
"slurm_spank_init_post_opt",
"slurm_spank_local_user_init",
"slurm_spank_user_init",
@@ -90,6 +96,8 @@
"slurm_spank_task_init",
"slurm_spank_task_post_fork",
"slurm_spank_task_exit",
+ "slurm_spank_job_epilog",
+ "slurm_spank_slurmd_exit",
"slurm_spank_exit"
};
@@ -102,6 +110,7 @@
char **argv;
struct spank_plugin_operations ops;
struct spank_option *opts;
+ struct spank_stack *stack;
};
/*
@@ -120,24 +129,15 @@
};
/*
- * Initial value for global optvals for SPANK plugin options
- */
-static int spank_optval = 0xfff;
-
-/*
- * Cache of options provided by spank plugins
- */
-static List option_cache = NULL;
-
-
-/*
- * SPANK plugin context (local, remote, allocator)
+ * SPANK plugin context type (local, remote, allocator)
*/
enum spank_context_type {
S_TYPE_NONE,
S_TYPE_LOCAL, /* LOCAL == srun */
- S_TYPE_REMOTE, /* REMOTE == slurmd */
- S_TYPE_ALLOCATOR /* ALLOCATOR == sbatch/salloc */
+ S_TYPE_REMOTE, /* REMOTE == slurmstepd */
+ S_TYPE_ALLOCATOR, /* ALLOCATOR == sbatch/salloc */
+ S_TYPE_SLURMD, /* SLURMD == slurmd */
+ S_TYPE_JOB_SCRIPT, /* JOB_SCRIPT == prolog/epilog*/
};
/*
@@ -145,6 +145,8 @@
*/
typedef enum step_fn {
SPANK_INIT = 0,
+ SPANK_SLURMD_INIT,
+ SPANK_JOB_PROLOG,
SPANK_INIT_POST_OPT,
LOCAL_USER_INIT,
STEP_USER_INIT,
@@ -152,9 +154,19 @@
STEP_USER_TASK_INIT,
STEP_TASK_POST_FORK,
STEP_TASK_EXIT,
+ SPANK_JOB_EPILOG,
+ SPANK_SLURMD_EXIT,
SPANK_EXIT
} step_fn_t;
+/*
+ * Job information in prolog/epilog context:
+ */
+struct job_script_info {
+ uint32_t jobid;
+ uid_t uid;
+};
+
struct spank_handle {
# define SPANK_MAGIC 0x00a5a500
int magic; /* Magic identifier to ensure validity. */
@@ -162,25 +174,95 @@
step_fn_t phase; /* Which spank fn are we called from? */
void * job; /* Reference to current srun|slurmd job */
slurmd_task_info_t * task; /* Reference to current task (if valid) */
+ struct spank_stack *stack; /* Reference to the current plugin stack*/
};
/*
- * SPANK plugins stack
+ * SPANK stack. The stack of loaded plugins and associated state.
*/
-static List spank_stack = NULL;
-static enum spank_context_type spank_ctx = S_TYPE_NONE;
-
-static pthread_mutex_t spank_mutex = PTHREAD_MUTEX_INITIALIZER;
+struct spank_stack {
+ enum spank_context_type type;/* Type of context for this stack */
+ List plugin_list; /* Stack of spank plugins */
+ List option_cache; /* Cache of plugin options in this ctx */
+ int spank_optval; /* optvalue for next plugin option */
+ const char * plugin_path; /* default path to search for plugins */
+};
/*
- * Default plugin dir
+ * The global spank plugin stack:
*/
-static const char * default_spank_path = NULL;
+static struct spank_stack *global_spank_stack = NULL;
/*
* Forward declarations
*/
static int _spank_plugin_options_cache(struct spank_plugin *p);
+static int _spank_stack_load (struct spank_stack *stack, const char *file);
+static void _spank_plugin_destroy (struct spank_plugin *);
+static void _spank_plugin_opt_destroy (struct spank_plugin_opt *);
+static int spank_stack_get_remote_options(struct spank_stack *, job_options_t);
+static int spank_stack_get_remote_options_env (struct spank_stack *, char **);
+static int spank_stack_set_remote_options_env (struct spank_stack * stack);
+static int dyn_spank_set_job_env (const char *var, const char *val, int ovwt);
+
+static void spank_stack_destroy (struct spank_stack *stack)
+{
+ if (stack->plugin_list)
+ list_destroy (stack->plugin_list);
+ if (stack->option_cache)
+ list_destroy (stack->option_cache);
+ xfree (stack->plugin_path);
+ xfree (stack);
+}
+
+static struct spank_stack *
+spank_stack_create (const char *file, enum spank_context_type type)
+{
+ slurm_ctl_conf_t *conf;
+ struct spank_stack *stack = xmalloc (sizeof (*stack));
+
+ conf = slurm_conf_lock();
+ stack->plugin_path = xstrdup (conf->plugindir);
+ slurm_conf_unlock();
+
+ stack->type = type;
+ stack->spank_optval = 0xfff;
+ stack->plugin_list =
+ list_create ((ListDelF) _spank_plugin_destroy);
+ stack->option_cache =
+ list_create ((ListDelF) _spank_plugin_opt_destroy);
+
+ if (_spank_stack_load (stack, file) < 0) {
+ spank_stack_destroy (stack);
+ return (NULL);
+ }
+
+ return (stack);
+}
+
+static List get_global_option_cache (void)
+{
+ if (global_spank_stack)
+ return (global_spank_stack->option_cache);
+ else
+ return (NULL);
+}
+
+
+static int plugin_in_list (List l, struct spank_plugin *sp)
+{
+ int rc = 0;
+ struct spank_plugin *p;
+ ListIterator i = list_iterator_create (l);
+ while ((p = list_next (i))) {
+ if (p->fq_path == sp->fq_path) {
+ rc = 1;
+ break;
+ }
+ }
+ list_iterator_destroy (i);
+ return (rc);
+}
static void _argv_append(char ***argv, int ac, const char *newarg)
{
@@ -256,7 +338,8 @@
return (0);
}
-static struct spank_plugin *_spank_plugin_create(char *path, int ac,
+static struct spank_plugin *_spank_plugin_create(struct spank_stack *stack,
+ char *path, int ac,
char **av, bool required)
{
struct spank_plugin *plugin;
@@ -283,11 +366,12 @@
plugin->ac = ac;
plugin->argv = av;
plugin->ops = ops;
+ plugin->stack = stack;
/*
* Do not load static plugin options table in allocator context.
*/
- if (spank_ctx != S_TYPE_ALLOCATOR)
+ if (stack->type != S_TYPE_ALLOCATOR)
plugin->opts = plugin_get_sym(p, "spank_options");
return (plugin);
@@ -353,10 +437,40 @@
return (NULL);
}
-static int _spank_conf_include (const char *, int, const char *, List *);
+static int _spank_conf_include (struct spank_stack *,
+ const char *, int, const char *);
static int
-_spank_stack_process_line(const char *file, int line, char *buf, List *stackp)
+spank_stack_plugin_valid_for_context (struct spank_stack *stack,
+ struct spank_plugin *p)
+{
+ switch (stack->type) {
+ case S_TYPE_JOB_SCRIPT:
+ if (p->ops.job_prolog || p->ops.job_epilog)
+ return (1);
+ break;
+ case S_TYPE_SLURMD:
+ if (p->ops.slurmd_init || p->ops.slurmd_exit)
+ return (1);
+ break;
+ case S_TYPE_LOCAL:
+ case S_TYPE_ALLOCATOR:
+ case S_TYPE_REMOTE:
+ /*
+ * For backwards compatibility: All plugins were
+ * always loaded in these contexts, so continue
+ * to do so
+ */
+ return (1);
+ default:
+ return (0);
+ }
+ return (0);
+}
+
+static int
+_spank_stack_process_line(struct spank_stack *stack,
+ const char *file, int line, char *buf)
{
char **argv;
int ac;
@@ -372,7 +486,7 @@
}
if (type == CF_INCLUDE) {
- int rc = _spank_conf_include (file, line, path, stackp);
+ int rc = _spank_conf_include (stack, file, line, path);
xfree (path);
return (rc);
}
@@ -383,14 +497,14 @@
if (path[0] != '/') {
char *f;
- if ((f = _spank_plugin_find (default_spank_path, path))) {
+ if ((f = _spank_plugin_find (stack->plugin_path, path))) {
xfree (path);
path = f;
}
}
required = (type == CF_REQUIRED);
- if (!(p = _spank_plugin_create(path, ac, argv, required))) {
+ if (!(p = _spank_plugin_create(stack, path, ac, argv, required))) {
if (required)
error ("spank: %s:%d:"
" Failed to load plugin %s. Aborting.",
@@ -401,57 +515,64 @@
file, line, path);
return (required ? -1 : 0);
}
- if (*stackp == NULL)
- *stackp = list_create((ListDelF) _spank_plugin_destroy);
- verbose ("spank: %s:%d: Loaded plugin %s",
+ if (plugin_in_list (stack->plugin_list, p)) {
+ error ("spank: %s: cowardly refusing to load a second time",
+ p->fq_path);
+ _spank_plugin_destroy (p);
+ return (0);
+ }
+
+ if (!spank_stack_plugin_valid_for_context (stack, p)) {
+ debug2 ("spank: %s: no callbacks in this context", p->fq_path);
+ _spank_plugin_destroy (p);
+ return (0);
+ }
+
+ debug ("spank: %s:%d: Loaded plugin %s",
file, line, xbasename (p->fq_path));
- list_append (*stackp, p);
+ list_append (stack->plugin_list, p);
_spank_plugin_options_cache(p);
return (0);
}
-static int _spank_stack_create(const char *path, List * listp)
+static int _spank_stack_load(struct spank_stack *stack, const char *path)
{
+ int rc = 0;
int line;
char buf[4096];
FILE *fp;
- verbose("spank: opening plugin stack %s", path);
+ debug ("spank: opening plugin stack %s", path);
+ /*
+ * Try to open plugstack.conf. A missing config file is not an
+ * error, but is equivalent to an empty file.
+ */
if (!(fp = safeopen(path, "r", SAFEOPEN_NOCREATE))) {
if (errno == ENOENT)
- debug("spank: Failed to open %s: %m", path);
- else
- error("spank: Failed to open %s: %m", path);
- return -1;
+ return (0);
+ error("spank: Failed to open %s: %m", path);
+ return (-1);
}
line = 1;
while (fgets(buf, sizeof(buf), fp)) {
- if (_spank_stack_process_line(path, line, buf, listp) < 0)
- goto fail_immediately;
+ rc = _spank_stack_process_line(stack, path, line, buf);
+ if (rc < 0)
+ break;
line++;
}
fclose(fp);
- return (0);
-
- fail_immediately:
- if (*listp != NULL) {
- list_destroy(*listp);
- *listp = NULL;
- }
- fclose(fp);
- return (-1);
+ return (rc);
}
-static int
-_spank_conf_include (const char *file, int lineno, const char *pattern,
- List *stackp)
+static int _spank_conf_include (struct spank_stack *stack,
+ const char *file, int lineno, const char *pattern)
{
int rc = 0;
glob_t gl;
@@ -474,13 +595,13 @@
xfree (dirc);
}
- verbose ("%s: %d: include \"%s\"", file, lineno, pattern);
+ debug ("%s: %d: include \"%s\"", file, lineno, pattern);
rc = glob (pattern, 0, NULL, &gl);
switch (rc) {
case 0:
for (i = 0; i < gl.gl_pathc; i++) {
- rc = _spank_stack_create (gl.gl_pathv[i], stackp);
+ rc = _spank_stack_load (stack, gl.gl_pathv[i]);
if (rc < 0)
break;
}
@@ -504,18 +625,19 @@
}
static int
-_spank_handle_init(struct spank_handle *spank, void * arg,
- int taskid, step_fn_t fn)
+_spank_handle_init(struct spank_handle *spank, struct spank_stack *stack,
+ void * arg, int taskid, step_fn_t fn)
{
memset(spank, 0, sizeof(*spank));
spank->magic = SPANK_MAGIC;
spank->plugin = NULL;
spank->phase = fn;
+ spank->stack = stack;
if (arg != NULL) {
spank->job = arg;
- if (spank_ctx == S_TYPE_REMOTE && taskid >= 0) {
+ if (stack->type == S_TYPE_REMOTE && taskid >= 0) {
spank->task = ((slurmd_job_t *) arg)->task[taskid];
}
}
@@ -527,6 +649,10 @@
switch (type) {
case SPANK_INIT:
return ("init");
+ case SPANK_SLURMD_INIT:
+ return ("slurmd_init");
+ case SPANK_JOB_PROLOG:
+ return ("job_prolog");
case SPANK_INIT_POST_OPT:
return ("init_post_opt");
case LOCAL_USER_INIT:
@@ -541,6 +667,10 @@
return ("task_post_fork");
case STEP_TASK_EXIT:
return ("task_exit");
+ case SPANK_JOB_EPILOG:
+ return ("job_epilog");
+ case SPANK_SLURMD_EXIT:
+ return ("slurmd_exit");
case SPANK_EXIT:
return ("exit");
}
@@ -549,7 +679,44 @@
return ("unknown");
}
-static int _do_call_stack(step_fn_t type, void * job, int taskid)
+static spank_f *spank_plugin_get_fn (struct spank_plugin *sp, step_fn_t type)
+{
+ switch (type) {
+ case SPANK_INIT:
+ return (sp->ops.init);
+ case SPANK_SLURMD_INIT:
+ return (sp->ops.slurmd_init);
+ case SPANK_JOB_PROLOG:
+ return (sp->ops.job_prolog);
+ case SPANK_INIT_POST_OPT:
+ return (sp->ops.init_post_opt);
+ case LOCAL_USER_INIT:
+ return (sp->ops.local_user_init);
+ case STEP_USER_INIT:
+ return (sp->ops.user_init);
+ case STEP_TASK_INIT_PRIV:
+ return (sp->ops.task_init_privileged);
+ case STEP_USER_TASK_INIT:
+ return (sp->ops.user_task_init);
+ case STEP_TASK_POST_FORK:
+ return (sp->ops.task_post_fork);
+ case STEP_TASK_EXIT:
+ return (sp->ops.task_exit);
+ case SPANK_JOB_EPILOG:
+ return (sp->ops.job_epilog);
+ case SPANK_SLURMD_EXIT:
+ return (sp->ops.slurmd_exit);
+ case SPANK_EXIT:
+ return (sp->ops.exit);
+ default:
+ error ("Unhandled spank function type=%d\n", type);
+ return (NULL);
+ }
+ return (NULL);
+}
+
+static int _do_call_stack(struct spank_stack *stack,
+ step_fn_t type, void * job, int taskid)
{
int rc = 0;
ListIterator i;
@@ -557,97 +724,29 @@
struct spank_handle spank[1];
const char *fn_name;
- if (!spank_stack)
- return (0);
+ if (!stack)
+ return (-1);
- if (_spank_handle_init(spank, job, taskid, type) < 0) {
+ if (_spank_handle_init(spank, stack, job, taskid, type) < 0) {
error("spank: Failed to initialize handle for plugins");
return (-1);
}
fn_name = _step_fn_name(type);
- i = list_iterator_create(spank_stack);
+ i = list_iterator_create(stack->plugin_list);
while ((sp = list_next(i))) {
const char *name = xbasename(sp->fq_path);
+ spank_f *spank_fn;
spank->plugin = sp;
- switch (type) {
- case SPANK_INIT:
- if (sp->ops.init) {
- rc = (*sp->ops.init) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case SPANK_INIT_POST_OPT:
- if (sp->ops.init_post_opt) {
- rc = (*sp->ops.init_post_opt) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case LOCAL_USER_INIT:
- if (sp->ops.local_user_init) {
- rc = (*sp->ops.local_user_init) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name, fn_name, rc);
- }
- break;
- case STEP_USER_INIT:
- if (sp->ops.user_init) {
- rc = (*sp->ops.user_init) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case STEP_TASK_INIT_PRIV:
- if (sp->ops.task_init_privileged) {
- rc = (*sp->ops.task_init_privileged)
- (spank, sp->ac, sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case STEP_USER_TASK_INIT:
- if (sp->ops.user_task_init) {
- rc = (*sp->ops.user_task_init) (spank,
- sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case STEP_TASK_POST_FORK:
- if (sp->ops.task_post_fork) {
- rc = (*sp->ops.task_post_fork) (spank,
- sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- case STEP_TASK_EXIT:
- if (sp->ops.task_exit) {
- rc = (*sp->ops.task_exit) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name, fn_name,
- rc);
- }
- break;
- case SPANK_EXIT:
- if (sp->ops.exit) {
- rc = (*sp->ops.exit) (spank, sp->ac,
- sp->argv);
- debug2("spank: %s: %s = %d", name,
- fn_name, rc);
- }
- break;
- }
+ spank_fn = spank_plugin_get_fn (sp, type);
+ if (!spank_fn)
+ continue;
+
+ rc = (*spank_fn) (spank, sp->ac, sp->argv);
+ debug2("spank: %s: %s = %d", name, fn_name, rc);
if ((rc < 0) && sp->required) {
error("spank: required plugin %s: "
@@ -662,52 +761,32 @@
return (rc);
}
-int _spank_init(enum spank_context_type context, slurmd_job_t * job)
+struct spank_stack *spank_stack_init(enum spank_context_type context)
{
slurm_ctl_conf_t *conf = slurm_conf_lock();
const char *path = conf->plugstack;
- default_spank_path = conf->plugindir;
slurm_conf_unlock();
- spank_ctx = context;
+ return spank_stack_create (path, context);
+}
- /*
- * A nonexistent spank config is not an error, but
- * abort on any other access failures
- */
- if (access (path, R_OK) < 0) {
- if (errno == ENOENT)
- return (0);
- error ("spank: Unable to open config file `%s': %m", path);
+int _spank_init(enum spank_context_type context, slurmd_job_t * job)
+{
+ struct spank_stack *stack;
+
+ if (!(stack = spank_stack_init (context)))
return (-1);
- }
+ global_spank_stack = stack;
- if (_spank_stack_create(path, &spank_stack) < 0) {
- error("spank: failed to create plugin stack");
- return (-1);
- }
+ return (_do_call_stack(stack, SPANK_INIT, job, -1));
+}
- if (_do_call_stack(SPANK_INIT, job, -1) < 0)
- return (-1);
-
- /*
- * Nothing more to do unless we are in remote context:
- */
- if (spank_ctx != S_TYPE_REMOTE)
- return (0);
-
- /*
- * Remote-specific code:
- */
- if (!job) {
- error("spank: spank_init called without job reference!");
- return (-1);
- }
-
+static int spank_stack_post_opt (struct spank_stack * stack, slurmd_job_t *job)
+{
/*
* Get any remote options from job launch message:
*/
- if (spank_get_remote_options(job->options) < 0) {
+ if (spank_stack_get_remote_options(stack, job->options) < 0) {
error("spank: Unable to get remote options");
return (-1);
}
@@ -715,22 +794,39 @@
/*
* Get any remote option passed thru environment
*/
- if (spank_get_remote_options_env(job->env) < 0) {
+ if (spank_stack_get_remote_options_env(stack, job->env) < 0) {
error("spank: Unable to get remote options from environment");
return (-1);
}
/*
+ * Now clear any remaining options passed through environment
+ */
+ spank_clear_remote_options_env (job->env);
+
+ /*
* Now that all options have been processed, we can
* call the post_opt handlers here in remote context.
*/
- return (_do_call_stack(SPANK_INIT_POST_OPT, job, -1) < 0);
+ return (_do_call_stack(stack, SPANK_INIT_POST_OPT, job, -1) < 0);
+
+}
+
+static int spank_init_remote (slurmd_job_t *job)
+{
+ if (_spank_init (S_TYPE_REMOTE, job) < 0)
+ return (-1);
+
+ /*
+ * _spank_init initializes global_spank_stack
+ */
+ return (spank_stack_post_opt (global_spank_stack, job));
}
int spank_init (slurmd_job_t * job)
{
if (job)
- return _spank_init (S_TYPE_REMOTE, job);
+ return spank_init_remote (job);
else
return _spank_init (S_TYPE_LOCAL, NULL);
}
@@ -740,70 +836,112 @@
return _spank_init (S_TYPE_ALLOCATOR, NULL);
}
+int spank_slurmd_init (void)
+{
+ return _spank_init (S_TYPE_SLURMD, NULL);
+}
+
int spank_init_post_opt (void)
{
- /*
- * In allocator context, set remote options in env here.
- */
- if (spank_ctx == S_TYPE_ALLOCATOR)
- spank_set_remote_options_env();
+ struct spank_stack *stack = global_spank_stack;
- return (_do_call_stack(SPANK_INIT_POST_OPT, NULL, -1));
+ /*
+ * Set remote options in our environment and the
+ * spank_job_env so that we can always pull them out
+ * on the remote side and/or job prolog epilog.
+ */
+ spank_stack_set_remote_options_env (stack);
+
+ return (_do_call_stack(stack, SPANK_INIT_POST_OPT, NULL, -1));
}
int spank_user(slurmd_job_t * job)
{
- return (_do_call_stack(STEP_USER_INIT, job, -1));
+ return (_do_call_stack(global_spank_stack, STEP_USER_INIT, job, -1));
}
int spank_local_user(struct spank_launcher_job_info *job)
{
- return (_do_call_stack(LOCAL_USER_INIT, job, -1));
+ return (_do_call_stack(global_spank_stack, LOCAL_USER_INIT, job, -1));
}
int spank_task_privileged(slurmd_job_t *job, int taskid)
{
- return (_do_call_stack(STEP_TASK_INIT_PRIV, job, taskid));
+ return (_do_call_stack(global_spank_stack, STEP_TASK_INIT_PRIV, job, taskid));
}
int spank_user_task(slurmd_job_t * job, int taskid)
{
- return (_do_call_stack(STEP_USER_TASK_INIT, job, taskid));
+ return (_do_call_stack(global_spank_stack, STEP_USER_TASK_INIT, job, taskid));
}
int spank_task_post_fork(slurmd_job_t * job, int taskid)
{
- return (_do_call_stack(STEP_TASK_POST_FORK, job, taskid));
+ return (_do_call_stack(global_spank_stack, STEP_TASK_POST_FORK, job, taskid));
}
int spank_task_exit(slurmd_job_t * job, int taskid)
{
- return (_do_call_stack(STEP_TASK_EXIT, job, taskid));
+ return (_do_call_stack(global_spank_stack, STEP_TASK_EXIT, job, taskid));
+}
+
+int spank_slurmd_exit (void)
+{
+ int rc;
+ rc = _do_call_stack (global_spank_stack, SPANK_SLURMD_EXIT, NULL, 0);
+ spank_stack_destroy (global_spank_stack);
+ global_spank_stack = NULL;
+ return (rc);
}
int spank_fini(slurmd_job_t * job)
{
- int rc = _do_call_stack(SPANK_EXIT, job, -1);
+ int rc = _do_call_stack(global_spank_stack, SPANK_EXIT, job, -1);
- if (option_cache)
- list_destroy(option_cache);
- if (spank_stack)
- list_destroy(spank_stack);
+ spank_stack_destroy (global_spank_stack);
+ global_spank_stack = NULL;
return (rc);
}
/*
+ * Run job_epilog or job_prolog callbacks in a private spank context.
+ */
+static int spank_job_script (step_fn_t fn, uint32_t jobid, uid_t uid)
+{
+ int rc = 0;
+ struct spank_stack *stack;
+ struct job_script_info jobinfo = { jobid, uid };
+
+ stack = spank_stack_init (S_TYPE_JOB_SCRIPT);
+ if (!stack)
+ return (-1);
+ global_spank_stack = stack;
+
+ rc = _do_call_stack (stack, fn, &jobinfo, -1);
+
+ spank_stack_destroy (stack);
+ global_spank_stack = NULL;
+ return (rc);
+}
+
+int spank_job_prolog (uint32_t jobid, uid_t uid)
+{
+ return spank_job_script (SPANK_JOB_PROLOG, jobid, uid);
+}
+
+int spank_job_epilog (uint32_t jobid, uid_t uid)
+{
+ return spank_job_script (SPANK_JOB_EPILOG, jobid, uid);
+}
+
+/*
* SPANK options functions
*/
-static int _spank_next_option_val(void)
+static int _spank_next_option_val(struct spank_stack *stack)
{
- int optval;
- slurm_mutex_lock(&spank_mutex);
- optval = spank_optval++;
- slurm_mutex_unlock(&spank_mutex);
- return (optval);
+ return (stack->spank_optval++);
}
static struct spank_option * _spank_option_copy(struct spank_option *opt)
@@ -842,7 +980,7 @@
struct spank_plugin_opt *spopt = xmalloc(sizeof(*spopt));
spopt->opt = _spank_option_copy (opt);
spopt->plugin = p;
- spopt->optval = _spank_next_option_val();
+ spopt->optval = _spank_next_option_val(p->stack);
spopt->found = 0;
spopt->optarg = NULL;
@@ -873,11 +1011,15 @@
{
int disabled = 0;
struct spank_plugin_opt *spopt;
+ struct spank_stack *stack;
+ List option_cache;
- if (!option_cache) {
- option_cache =
- list_create((ListDelF) _spank_plugin_opt_destroy);
+ stack = p->stack;
+ if (stack == NULL) {
+ error ("spank: %s: can't determine plugin context", p->name);
+ return (ESPANK_BAD_ARG);
}
+ option_cache = stack->option_cache;
spopt = list_find_first(option_cache,
(ListFindF) _opt_by_name, opt->name);
@@ -900,7 +1042,7 @@
return (ESPANK_NOSPACE);
}
- verbose("SPANK: appending plugin option \"%s\"", opt->name);
+ debug ("SPANK: appending plugin option \"%s\"", opt->name);
list_append(option_cache, _spank_plugin_opt_create(p, opt, disabled));
return (ESPANK_SUCCESS);
@@ -967,6 +1109,10 @@
struct option *opts = NULL;
ListIterator i = NULL;
+ List option_cache = get_global_option_cache();
+ if (option_cache == NULL)
+ return (NULL);
+
opts = optz_create();
/*
@@ -1000,6 +1146,7 @@
{
struct spank_plugin_opt *opt;
int rc = 0;
+ List option_cache = get_global_option_cache();
if (option_cache == NULL || (list_count(option_cache) == 0))
return (-1);
@@ -1161,6 +1308,7 @@
{
struct spank_plugin_opt *p;
ListIterator i;
+ List option_cache = get_global_option_cache();
if ((option_cache == NULL) || (list_count(option_cache) == 0))
return (0);
@@ -1232,13 +1380,21 @@
if (setenv (var, option->optarg, 1) < 0)
error ("failed to set %s=%s in env", var, option->optarg);
+ if (dyn_spank_set_job_env (var, option->optarg, 1) < 0)
+ error ("failed to set %s=%s in env", var, option->optarg);
+
return (0);
}
-int spank_set_remote_options_env(void)
+static int spank_stack_set_remote_options_env (struct spank_stack *stack)
{
struct spank_plugin_opt *p;
ListIterator i;
+ List option_cache;
+
+ if (stack == NULL)
+ return (0);
+ option_cache = stack->option_cache;
if ((option_cache == NULL) || (list_count(option_cache) == 0))
return (0);
@@ -1256,6 +1412,11 @@
{
struct spank_plugin_opt *p;
ListIterator i;
+ List option_cache;
+
+ if (global_spank_stack == NULL)
+ return (0);
+ option_cache = global_spank_stack->option_cache;
if ((option_cache == NULL) || (list_count(option_cache) == 0))
return (0);
@@ -1292,12 +1453,14 @@
return (1);
}
-static struct spank_plugin_opt *_find_remote_option_by_name(const char *str)
+static struct spank_plugin_opt *
+spank_stack_find_option_by_name(struct spank_stack *stack, const char *str)
{
struct spank_plugin_opt *opt = NULL;
struct opt_find_args args;
char buf[256];
char *name;
+ List option_cache = stack->option_cache;
if (strlcpy(buf, str, sizeof(buf)) >= sizeof(buf)) {
error("plugin option \"%s\" too big. Ignoring.", str);
@@ -1332,12 +1495,89 @@
return (opt);
}
+spank_err_t
+spank_option_getopt (spank_t sp, struct spank_option *opt, char **argp)
+{
+ const char *val;
+ char var[1024];
+ List option_cache;
+ struct spank_plugin_opt *spopt;
+
+ if (argp)
+ *argp = NULL;
+
+ if (!sp->plugin) {
+ error ("spank_option_getopt: Not called from a plugin!?");
+ return (ESPANK_NOT_AVAIL);
+ }
+
+ if (sp->phase == SPANK_INIT)
+ return (ESPANK_NOT_AVAIL);
+
+ if (!opt || !opt->name)
+ return (ESPANK_BAD_ARG);
+
+ if (opt->has_arg && !argp)
+ return (ESPANK_BAD_ARG);
+
+ /*
+ * First check the cache:
+ */
+ option_cache = sp->stack->option_cache;
+ spopt = list_find_first (option_cache,
+ (ListFindF) _opt_by_name,
+ opt->name);
+ if (spopt) {
+ if (opt->has_arg && argp)
+ *argp = spopt->optarg;
+ return (ESPANK_SUCCESS);
+ }
+
+ /*
+ * Otherwise, check current environment:
+ *
+ * We need to check for variables that start with either
+ * the default spank option env prefix, or the default
+ * prefix + an *extra* prefix of SPANK_, in case we're
+ * running in prolog/epilog, where SLURM prepends SPANK_
+ * to all spank job environment variables.
+ */
+ spopt = _spank_plugin_opt_create (sp->plugin, opt, 0);
+ memcpy (var, "SPANK_", 6);
+ if ((val = getenv (_opt_env_name(spopt, var+6, sizeof (var) - 6))) ||
+ (val = getenv (var))) {
+ spopt->optarg = xstrdup (val);
+ spopt->found = 1;
+ if (opt->has_arg && argp)
+ *argp = spopt->optarg;
+ }
+
+ /*
+ * Cache the result
+ */
+ list_append (option_cache, spopt);
+
+ if (!spopt->found)
+ return (ESPANK_ERROR);
+
+ return (ESPANK_SUCCESS);
+}
+
+
int spank_get_remote_options_env (char **env)
{
+ return spank_stack_get_remote_options_env (global_spank_stack, env);
+}
+
+
+static int
+spank_stack_get_remote_options_env (struct spank_stack *stack, char **env)
+{
char var [1024];
const char *arg;
struct spank_plugin_opt *option;
ListIterator i;
+ List option_cache = stack->option_cache;
if (!option_cache)
return (0);
@@ -1368,6 +1608,12 @@
int spank_get_remote_options(job_options_t opts)
{
+ return spank_stack_get_remote_options (global_spank_stack, opts);
+}
+
+static int
+spank_stack_get_remote_options(struct spank_stack *stack, job_options_t opts)
+{
const struct job_option_info *j;
job_options_iterator_reset(opts);
@@ -1378,7 +1624,7 @@
if (j->type != OPT_TYPE_SPANK)
continue;
- if (!(opt = _find_remote_option_by_name(j->option)))
+ if (!(opt = spank_stack_find_option_by_name(stack, j->option)))
continue;
p = opt->opt;
@@ -1392,6 +1638,36 @@
return (0);
}
+/*
+ * Clear any environment variables for spank options.
+ * spank option env vars have a prefix of SPANK_OPTION_ENV_PREFIX,
+ * or SPANK_ + SPANK_OPTION_ENV_PREFIX
+ */
+int spank_clear_remote_options_env (char **env)
+{
+ char **ep;
+ int len = strlen (SPANK_OPTION_ENV_PREFIX);
+
+ for (ep = env; *ep; ep++) {
+ char *p = *ep;
+ if (strncmp (*ep, "SPANK_", 6) == 0)
+ p = *ep+6;
+ if (strncmp (p, SPANK_OPTION_ENV_PREFIX, len) == 0) {
+ char *end = strchr (p+len, '=');
+ if (end) {
+ char name[1024];
+ memcpy (name, *ep, end - *ep);
+ name [end - *ep] = '\0';
+ debug ("unsetenv (%s)\n", name);
+ unsetenvp (env, name);
+ }
+ }
+ }
+ return (0);
+}
+
+
+
static int tasks_execd (spank_t spank)
{
return ( (spank->phase == STEP_TASK_POST_FORK)
@@ -1450,7 +1726,7 @@
}
}
-static spank_err_t _check_spank_item_validity (spank_item_t item, void *job)
+static spank_err_t _check_spank_item_validity (spank_t spank, spank_item_t item)
{
/*
* Valid in all contexts:
@@ -1465,15 +1741,24 @@
break; /* fallthru */
}
- if (spank_ctx == S_TYPE_LOCAL) {
- if (!_valid_in_local_context (item))
- return ESPANK_NOT_REMOTE;
- else if (job == NULL)
+ /*
+ * No spank_item_t is available in slurmd context at this time.
+ */
+ if (spank->stack->type == S_TYPE_SLURMD)
+ return ESPANK_NOT_AVAIL;
+ else if (spank->stack->type == S_TYPE_JOB_SCRIPT) {
+ if (item != S_JOB_UID && item != S_JOB_ID)
return ESPANK_NOT_AVAIL;
}
- else if (spank_ctx == S_TYPE_ALLOCATOR) {
+ else if (spank->stack->type == S_TYPE_LOCAL) {
+ if (!_valid_in_local_context (item))
+ return ESPANK_NOT_REMOTE;
+ else if (spank->job == NULL)
+ return ESPANK_NOT_AVAIL;
+ }
+ else if (spank->stack->type == S_TYPE_ALLOCATOR) {
if (_valid_in_allocator_context (item)) {
- if (job)
+ if (spank->job)
return ESPANK_SUCCESS;
else
return ESPANK_NOT_AVAIL;
@@ -1543,7 +1828,7 @@
{
if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
return (-1);
- if (spank_ctx == S_TYPE_REMOTE)
+ if (spank->stack->type == S_TYPE_REMOTE)
return (1);
else
return (0);
@@ -1551,13 +1836,19 @@
spank_context_t spank_context (void)
{
- switch (spank_ctx) {
+ if (global_spank_stack == NULL)
+ return S_CTX_ERROR;
+ switch (global_spank_stack->type) {
case S_TYPE_REMOTE:
return S_CTX_REMOTE;
case S_TYPE_LOCAL:
return S_CTX_LOCAL;
case S_TYPE_ALLOCATOR:
return S_CTX_ALLOCATOR;
+ case S_TYPE_SLURMD:
+ return S_CTX_SLURMD;
+ case S_TYPE_JOB_SCRIPT:
+ return S_CTX_JOB_SCRIPT;
default:
return S_CTX_ERROR;
}
@@ -1582,6 +1873,7 @@
slurmd_task_info_t *task;
slurmd_job_t *slurmd_job = NULL;
struct spank_launcher_job_info *launcher_job = NULL;
+ struct job_script_info *s_job_info = NULL;
va_list vargs;
spank_err_t rc = ESPANK_SUCCESS;
@@ -1591,31 +1883,35 @@
/*
* Check for validity of the given item in the current context
*/
- rc = _check_spank_item_validity (item, spank->job);
+ rc = _check_spank_item_validity (spank, item);
if (rc != ESPANK_SUCCESS)
return (rc);
- if (spank_ctx == S_TYPE_LOCAL)
+ if (spank->stack->type == S_TYPE_LOCAL)
launcher_job = spank->job;
- else if (spank_ctx == S_TYPE_REMOTE)
+ else if (spank->stack->type == S_TYPE_REMOTE)
slurmd_job = spank->job;
+ else if (spank->stack->type == S_TYPE_JOB_SCRIPT)
+ s_job_info = spank->job;
va_start(vargs, item);
switch (item) {
case S_JOB_UID:
p2uid = va_arg(vargs, uid_t *);
- if (spank_ctx == S_TYPE_LOCAL)
+ if (spank->stack->type == S_TYPE_LOCAL)
*p2uid = launcher_job->uid;
- else if (spank_ctx == S_TYPE_REMOTE)
+ else if (spank->stack->type == S_TYPE_REMOTE)
*p2uid = slurmd_job->uid;
+ else if (spank->stack->type == S_TYPE_JOB_SCRIPT)
+ *p2uid = s_job_info->uid;
else
*p2uid = getuid();
break;
case S_JOB_GID:
p2gid = va_arg(vargs, gid_t *);
- if (spank_ctx == S_TYPE_LOCAL)
+ if (spank->stack->type == S_TYPE_LOCAL)
*p2gid = launcher_job->gid;
- else if (spank_ctx == S_TYPE_REMOTE)
+ else if (spank->stack->type == S_TYPE_REMOTE)
*p2gid = slurmd_job->gid;
else
*p2gid = getgid();
@@ -1628,21 +1924,23 @@
break;
case S_JOB_ID:
p2uint32 = va_arg(vargs, uint32_t *);
- if (spank_ctx == S_TYPE_LOCAL)
+ if (spank->stack->type == S_TYPE_LOCAL)
*p2uint32 = launcher_job->jobid;
- else
+ else if (spank->stack->type == S_TYPE_REMOTE)
*p2uint32 = slurmd_job->jobid;
+ else if (spank->stack->type == S_TYPE_JOB_SCRIPT)
+ *p2uint32 = s_job_info->jobid;
break;
case S_JOB_STEPID:
p2uint32 = va_arg(vargs, uint32_t *);
- if (spank_ctx == S_TYPE_LOCAL)
+ if (spank->stack->type == S_TYPE_LOCAL)
*p2uint32 = launcher_job->stepid;
else
*p2uint32 = slurmd_job->stepid;
break;
case S_JOB_NNODES:
p2uint32 = va_arg(vargs, uint32_t *);
- if (spank_ctx == S_TYPE_LOCAL) {
+ if (spank->stack->type == S_TYPE_LOCAL) {
if (launcher_job->step_layout)
*p2uint32 = launcher_job->step_layout->
node_cnt;
@@ -1663,7 +1961,7 @@
break;
case S_JOB_TOTAL_TASK_COUNT:
p2uint32 = va_arg(vargs, uint32_t *);
- if (spank_ctx == S_TYPE_LOCAL) {
+ if (spank->stack->type == S_TYPE_LOCAL) {
if (launcher_job->step_layout)
*p2uint32 = launcher_job->step_layout->
task_cnt;
@@ -1685,7 +1983,7 @@
case S_JOB_ARGV:
p2int = va_arg(vargs, int *);
p2argv = va_arg(vargs, char ***);
- if (spank_ctx == S_TYPE_LOCAL) {
+ if (spank->stack->type == S_TYPE_LOCAL) {
*p2int = launcher_job->argc;
*p2argv = launcher_job->argv;
} else {
@@ -1811,19 +2109,26 @@
return (rc);
}
+spank_err_t spank_env_access_check (spank_t spank)
+{
+ if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
+ return (ESPANK_BAD_ARG);
+ if (spank->stack->type != S_TYPE_REMOTE)
+ return (ESPANK_NOT_REMOTE);
+ if (spank->job == NULL)
+ return (ESPANK_BAD_ARG);
+ return (ESPANK_SUCCESS);
+
+}
+
spank_err_t spank_getenv(spank_t spank, const char *var, char *buf,
int len)
{
char *val;
+ spank_err_t err = spank_env_access_check (spank);
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
-
- if (spank_ctx != S_TYPE_REMOTE)
- return (ESPANK_NOT_REMOTE);
-
- if (spank->job == NULL)
- return (ESPANK_BAD_ARG);
+ if (err != ESPANK_SUCCESS)
+ return (err);
if (len < 0)
return (ESPANK_BAD_ARG);
@@ -1841,15 +2146,10 @@
int overwrite)
{
slurmd_job_t * job;
+ spank_err_t err = spank_env_access_check (spank);
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
-
- if (spank_ctx != S_TYPE_REMOTE)
- return (ESPANK_NOT_REMOTE);
-
- if (spank->job == NULL)
- return (ESPANK_BAD_ARG);
+ if (err != ESPANK_SUCCESS)
+ return (err);
if ((var == NULL) || (val == NULL))
return (ESPANK_BAD_ARG);
@@ -1867,14 +2167,10 @@
spank_err_t spank_unsetenv (spank_t spank, const char *var)
{
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
+ spank_err_t err = spank_env_access_check (spank);
- if (spank_ctx != S_TYPE_REMOTE)
- return (ESPANK_NOT_REMOTE);
-
- if (spank->job == NULL)
- return (ESPANK_BAD_ARG);
+ if (err != ESPANK_SUCCESS)
+ return (err);
if (var == NULL)
return (ESPANK_BAD_ARG);
@@ -1924,20 +2220,33 @@
return ((*fn) (n));
}
+static spank_err_t spank_job_control_access_check (spank_t spank)
+{
+ if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
+ return (ESPANK_BAD_ARG);
+
+ if (spank_remote (spank))
+ return (ESPANK_NOT_LOCAL);
+
+ if (spank->stack->type == S_TYPE_SLURMD)
+ return (ESPANK_NOT_AVAIL);
+
+ return (ESPANK_SUCCESS);
+}
+
spank_err_t spank_job_control_getenv (spank_t spank, const char *var,
char *buf, int len)
{
const char *val;
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
+ spank_err_t err;
+
+ if ((err = spank_job_control_access_check (spank)))
+ return (err);
if ((var == NULL) || (buf == NULL) || (len <= 0))
return (ESPANK_BAD_ARG);
- if (spank_remote (spank))
- return (ESPANK_NOT_LOCAL);
-
val = dyn_spank_get_job_env (var);
if (val == NULL)
return (ESPANK_ENV_NOEXIST);
@@ -1951,15 +2260,14 @@
spank_err_t spank_job_control_setenv (spank_t spank, const char *var,
const char *val, int overwrite)
{
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
+ spank_err_t err;
+
+ if ((err = spank_job_control_access_check (spank)))
+ return (err);
if ((var == NULL) || (val == NULL))
return (ESPANK_BAD_ARG);
- if (spank_remote (spank))
- return (ESPANK_NOT_LOCAL);
-
if (dyn_spank_set_job_env (var, val, overwrite) < 0)
return (ESPANK_BAD_ARG);
@@ -1968,15 +2276,14 @@
spank_err_t spank_job_control_unsetenv (spank_t spank, const char *var)
{
- if ((spank == NULL) || (spank->magic != SPANK_MAGIC))
- return (ESPANK_BAD_ARG);
+ spank_err_t err;
+
+ if ((err = spank_job_control_access_check (spank)))
+ return (err);
if (var == NULL)
return (ESPANK_BAD_ARG);
- if (spank_remote (spank))
- return (ESPANK_NOT_LOCAL);
-
if (dyn_spank_unset_job_env (var) < 0)
return (ESPANK_BAD_ARG);
diff --git a/src/common/plugstack.h b/src/common/plugstack.h
index 5506213..2e480ab 100644
--- a/src/common/plugstack.h
+++ b/src/common/plugstack.h
@@ -36,6 +36,7 @@
\*****************************************************************************/
#ifndef _PLUGSTACK_H
+#define _PLUGSTACK_H
#if HAVE_CONFIG_H
# include <config.h>
@@ -66,6 +67,10 @@
int spank_init (slurmd_job_t *job);
+int spank_slurmd_init (void);
+
+int spank_job_prolog (uint32_t jobid, uid_t uid);
+
int spank_init_allocator (void);
int spank_init_post_opt (void);
@@ -82,6 +87,10 @@
int spank_task_exit (slurmd_job_t *job, int taskid);
+int spank_job_epilog (uint32_t jobid, uid_t uid);
+
+int spank_slurmd_exit (void);
+
int spank_fini (slurmd_job_t *job);
/*
@@ -131,12 +140,6 @@
*/
int spank_set_remote_options (job_options_t options);
-/* Set all registered remote options (i.e. those passed to
- * spank_process_option) in the current environment for later
- * retreival by spank_get_remote_options_env().
- */
-int spank_set_remote_options_env (void);
-
/* Register any remote spank options that exist in `options'
* to their respective spank plugins. This function ends up invoking
* all plugin option callbacks, and will fail (return < 0) if
@@ -157,4 +160,7 @@
*/
int spank_get_remote_options_env (char **env);
+/* Clear any spank remote options encoded in environment.
+ */
+int spank_clear_remote_options_env (char **env);
#endif /* !_PLUGSTACK_H */
diff --git a/src/common/proc_args.c b/src/common/proc_args.c
index b3f999f..9213fff 100644
--- a/src/common/proc_args.c
+++ b/src/common/proc_args.c
@@ -46,6 +46,10 @@
# include <strings.h>
#endif
+#ifndef __USE_ISOC99
+#define __USE_ISOC99
+#endif
+
#ifdef HAVE_LIMITS_H
# include <limits.h>
#endif
@@ -61,7 +65,7 @@
#include <fcntl.h>
#include <stdarg.h> /* va_start */
#include <stdio.h>
-#include <stdlib.h> /* getenv */
+#include <stdlib.h> /* getenv, strtoll */
#include <pwd.h> /* getpwuid */
#include <ctype.h> /* isdigit */
#include <sys/param.h> /* MAXPATHLEN */
@@ -360,8 +364,13 @@
return false;
}
xfree(min_str);
- if (*min_nodes == 0)
+#ifdef HAVE_CRAY
+ if (*min_nodes < 0) {
+#else
+ if (*min_nodes == 0) {
+#endif
*min_nodes = 1;
+ }
max_str = xstrndup(ptr+1, strlen(arg)-((ptr+1)-arg));
*max_nodes = _str_to_nodes(max_str, &leftover);
@@ -377,7 +386,11 @@
error("\"%s\" is not a valid node count", arg);
return false;
}
+#ifdef HAVE_CRAY
+ if (*min_nodes < 0) {
+#else
if (*min_nodes == 0) {
+#endif
/* whitespace does not a valid node count make */
error("\"%s\" is not a valid node count", arg);
return false;
@@ -689,7 +702,7 @@
_create_path_list(void)
{
List l = list_create(_freeF);
- char *path = xstrdup(getenv("PATH"));
+ char *path;
char *c, *lc;
c = getenv("PATH");
@@ -871,3 +884,68 @@
return sig;
}
+
+
+/*
+ * parse_uint32 - Convert anscii string to a 32 bit unsigned int.
+ * IN aval - ascii string.
+ * IN/OUT ival - 32 bit pointer.
+ * RET 0 if no error, 1 otherwise.
+ */
+extern int parse_uint32(char *aval, uint32_t *ival)
+{
+ /*
+ * First, convert the ascii value it to a
+ * long long int. If the result is greater
+ * than or equal to 0 and less than NO_VAL
+ * set the value and return. Otherwise return
+ * an error.
+ */
+ uint32_t max32uint = (uint32_t) NO_VAL;
+ long long tval;
+ char *p;
+
+ /*
+ * Return error for invalid value.
+ */
+ tval = strtoll(aval, &p, 10);
+ if (p[0] || (tval == LLONG_MIN) || (tval == LLONG_MAX) ||
+ (tval < 0) || (tval >= max32uint))
+ return 1;
+
+ *ival = (uint32_t) tval;
+
+ return 0;
+}
+
+/*
+ * parse_uint16 - Convert anscii string to a 16 bit unsigned int.
+ * IN aval - ascii string.
+ * IN/OUT ival - 16 bit pointer.
+ * RET 0 if no error, 1 otherwise.
+ */
+extern int parse_uint16(char *aval, uint16_t *ival)
+{
+ /*
+ * First, convert the ascii value it to a
+ * long long int. If the result is greater then
+ * or equal to 0 and less than (uint16_t) NO_VAL
+ * set the value and return. Otherwise
+ * return an error.
+ */
+ uint16_t max16uint = (uint16_t) NO_VAL;
+ long long tval;
+ char *p;
+
+ /*
+ * Return error for invalid value.
+ */
+ tval = strtoll(aval, &p, 10);
+ if (p[0] || (tval == LLONG_MIN) || (tval == LLONG_MAX) ||
+ (tval < 0) || (tval >= max16uint))
+ return 1;
+
+ *ival = (uint16_t) tval;
+
+ return 0;
+}
diff --git a/src/common/proc_args.h b/src/common/proc_args.h
index c23195b..f342eb1 100644
--- a/src/common/proc_args.h
+++ b/src/common/proc_args.h
@@ -145,4 +145,20 @@
* Return 0 on failure */
int sig_name2num(char *signal_name);
+/*
+ * parse_uint16 - Convert anscii string to a 16 bit unsigned int.
+ * IN aval - ascii string.
+ * IN/OUT ival - 16 bit pointer.
+ * RET 0 if no error, 1 otherwise.
+ */
+extern int parse_uint16(char *aval, uint16_t *ival);
+
+/*
+ * parse_uint32 - Convert anscii string to a 32 bit unsigned int.
+ * IN aval - ascii string.
+ * IN/OUT ival - 32 bit pointer.
+ * RET 0 if no error, 1 otherwise.
+ */
+extern int parse_uint32(char *aval, uint32_t *ival);
+
#endif /* !_PROC_ARGS_H */
diff --git a/src/common/read_config.c b/src/common/read_config.c
index aacd6ff..174b8cc 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -68,6 +68,7 @@
#include "src/common/parse_spec.h"
#include "src/common/parse_time.h"
#include "src/common/read_config.h"
+#include "src/common/slurm_accounting_storage.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/slurm_protocol_defs.h"
#include "src/common/slurm_rlimits_info.h"
@@ -226,6 +227,7 @@
{"PriorityMaxAge", S_P_STRING},
{"PriorityUsageResetPeriod", S_P_STRING},
{"PriorityType", S_P_STRING},
+ {"PriorityFlags", S_P_STRING},
{"PriorityWeightAge", S_P_UINT32},
{"PriorityWeightFairshare", S_P_UINT32},
{"PriorityWeightJobSize", S_P_UINT32},
@@ -238,6 +240,8 @@
{"PropagatePrioProcess", S_P_UINT16},
{"PropagateResourceLimitsExcept", S_P_STRING},
{"PropagateResourceLimits", S_P_STRING},
+ {"RebootProgram", S_P_STRING},
+ {"ReconfigFlags", S_P_STRING},
{"ResumeProgram", S_P_STRING},
{"ResumeRate", S_P_UINT16},
{"ResumeTimeout", S_P_UINT16},
@@ -254,12 +258,12 @@
{"SelectTypeParameters", S_P_STRING},
{"SlurmUser", S_P_STRING},
{"SlurmdUser", S_P_STRING},
- {"SlurmctldDebug", S_P_UINT16},
+ {"SlurmctldDebug", S_P_STRING},
{"SlurmctldLogFile", S_P_STRING},
{"SlurmctldPidFile", S_P_STRING},
{"SlurmctldPort", S_P_STRING},
{"SlurmctldTimeout", S_P_UINT16},
- {"SlurmdDebug", S_P_UINT16},
+ {"SlurmdDebug", S_P_STRING},
{"SlurmdLogFile", S_P_STRING},
{"SlurmdPidFile", S_P_STRING},
{"SlurmdPort", S_P_UINT32},
@@ -465,9 +469,9 @@
s_p_get_string(&n->reason, "Reason", dflt);
if (!s_p_get_string(&node_state, "State", tbl) &&
- !s_p_get_string(&node_state, "State", dflt))
+ !s_p_get_string(&node_state, "State", dflt)) {
n->node_state = NODE_STATE_UNKNOWN;
- else {
+ } else {
n->node_state = state_str2int(node_state,
(char *) value);
if (n->node_state == (uint16_t) NO_VAL)
@@ -808,6 +812,7 @@
{"PreemptMode", S_P_STRING},
{"Priority", S_P_UINT16},
{"RootOnly", S_P_BOOLEAN}, /* YES or NO */
+ {"ReqResv", S_P_BOOLEAN}, /* YES or NO */
{"Shared", S_P_STRING}, /* YES, NO, or FORCE */
{"State", S_P_STRING}, /* UP, DOWN, INACTIVE or DRAIN */
{NULL}
@@ -951,6 +956,10 @@
&& !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt))
p->root_only_flag = false;
+ if (!s_p_get_boolean(&p->req_resv_flag, "ReqResv", tbl)
+ && !s_p_get_boolean(&p->req_resv_flag, "ReqResv", dflt))
+ p->req_resv_flag = false;
+
if (s_p_get_string(&tmp, "PreemptMode", tbl) ||
s_p_get_string(&tmp, "PreemptMode", dflt)) {
p->preempt_mode = preempt_mode_num(tmp);
@@ -1170,7 +1179,8 @@
static void _push_to_hashtbls(char *alias, char *hostname,
char *address, uint16_t port,
uint16_t cpus, uint16_t sockets,
- uint16_t cores, uint16_t threads)
+ uint16_t cores, uint16_t threads,
+ bool front_end)
{
int hostname_idx, alias_idx;
names_ll_t *p, *new;
@@ -1194,6 +1204,10 @@
p = node_to_host_hashtbl[alias_idx];
while (p) {
if (strcmp(p->alias, alias)==0) {
+ if (front_end)
+ fatal("Frontend not configured correctly "
+ "in slurm.conf. See man slurm.conf "
+ "look for frontendname.");
fatal("Duplicated NodeName %s in the config file",
p->alias);
return;
@@ -1313,7 +1327,7 @@
hostname = hostlist_shift(hostname_list);
_push_to_hashtbls(alias, hostname, address, node_ptr->port,
node_ptr->cpus, node_ptr->sockets,
- node_ptr->cores, node_ptr->threads);
+ node_ptr->cores, node_ptr->threads, 0);
free(alias);
if (address_count > 1) {
address_count--;
@@ -1378,7 +1392,7 @@
address = hostlist_shift(address_list);
_push_to_hashtbls(hostname, hostname, address,
- front_end_ptr->port, 1, 1, 1, 1);
+ front_end_ptr->port, 1, 1, 1, 1, 1);
free(hostname);
free(address);
}
@@ -1652,6 +1666,40 @@
}
/*
+ * slurm_reset_alias - Reset the address and hostname of a specific node name
+ */
+extern void slurm_reset_alias(char *node_name, char *node_addr,
+ char *node_hostname)
+{
+ int idx;
+ names_ll_t *p;
+
+ slurm_conf_lock();
+ _init_slurmd_nodehash();
+
+ idx = _get_hash_idx(node_name);
+ p = node_to_host_hashtbl[idx];
+ while (p) {
+ if (strcmp(p->alias, node_name) == 0) {
+ if (node_addr) {
+ xfree(p->address);
+ p->address = xstrdup(node_addr);
+ p->addr_initialized = false;
+ }
+ if (node_hostname) {
+ xfree(p->hostname);
+ p->hostname = xstrdup(node_hostname);
+ }
+ break;
+ }
+ p = p->next_alias;
+ }
+ slurm_conf_unlock();
+
+ return;
+}
+
+/*
* slurm_conf_get_addr - Return the slurm_addr_t for a given NodeName
* Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure.
*/
@@ -1807,6 +1855,7 @@
xfree (ctl_conf_ptr->prolog_slurmctld);
xfree (ctl_conf_ptr->propagate_rlimits);
xfree (ctl_conf_ptr->propagate_rlimits_except);
+ xfree (ctl_conf_ptr->reboot_program);
xfree (ctl_conf_ptr->resume_program);
xfree (ctl_conf_ptr->salloc_default_command);
xfree (ctl_conf_ptr->sched_logfile);
@@ -1923,6 +1972,8 @@
ctl_conf_ptr->propagate_prio_process = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->propagate_rlimits);
xfree (ctl_conf_ptr->propagate_rlimits_except);
+ xfree (ctl_conf_ptr->reboot_program);
+ ctl_conf_ptr->reconfig_flags = 0;
ctl_conf_ptr->resume_timeout = 0;
xfree (ctl_conf_ptr->resume_program);
ctl_conf_ptr->resume_rate = (uint16_t) NO_VAL;
@@ -1984,6 +2035,74 @@
return;
}
+/* handle config name in form (example) slurmdbd:cluster0:10.0.0.254:6819
+ *
+ * NOTE: Changes are required in the accounting_storage/slurmdbd plugin in
+ * order for this to work as desired. Andriy Grytsenko (Massive Solutions
+ * Limited) has a private accounting_storage plugin with this functionality */
+static int _config_is_storage(s_p_hashtbl_t *hashtbl, char *name)
+{
+ char *cluster, *host, *port;
+ void *db_conn;
+ config_key_pair_t *pair;
+ List config;
+ ListIterator iter;
+ int rc = -1;
+
+ cluster = strchr(name, ':');
+ if (cluster == NULL)
+ return (-1);
+ host = strchr(&cluster[1], ':');
+ if (host == NULL)
+ return (-1);
+ port = strrchr(&host[1], ':');
+ if (port == NULL)
+ return (-1);
+ conf_ptr->accounting_storage_type = xstrdup_printf("accounting_storage/%.*s",
+ (int)(cluster - name), name);
+ cluster++;
+ cluster = xstrndup(cluster, host - cluster);
+ host++;
+ conf_ptr->accounting_storage_host = xstrndup(host, port - host);
+ port++;
+ debug3("trying retrieve config via %s from host %s on port %s",
+ conf_ptr->accounting_storage_type,
+ conf_ptr->accounting_storage_host, port);
+ conf_ptr->accounting_storage_port = atoi(port);
+ conf_ptr->plugindir = xstrdup(default_plugin_path);
+ /* unlock conf_lock and set as initialized before accessing it */
+ conf_initialized = true;
+ pthread_mutex_unlock(&conf_lock);
+ db_conn = acct_storage_g_get_connection(NULL, 0, false, NULL);
+ if (db_conn == NULL)
+ goto end; /* plugin will out error itself */
+ config = acct_storage_g_get_config(db_conn, "slurm.conf");
+ acct_storage_g_close_connection(&db_conn); /* ignore error code */
+ if (config == NULL) {
+ error("cannot retrieve config from storage");
+ goto end;
+ }
+ iter = list_iterator_create(config);
+ while ((pair = list_next(iter)) != NULL)
+ s_p_parse_pair(hashtbl, pair->name, pair->value);
+ list_iterator_destroy(iter);
+ list_destroy(config);
+ rc = 0; /* done */
+
+end:
+ /* restore status quo now */
+ pthread_mutex_lock(&conf_lock);
+ conf_initialized = false;
+ xfree(cluster);
+ xfree(conf_ptr->accounting_storage_type);
+ xfree(conf_ptr->accounting_storage_host);
+ xfree(conf_ptr->plugindir);
+ conf_ptr->accounting_storage_type = NULL;
+ conf_ptr->accounting_storage_host = NULL;
+ conf_ptr->plugindir = NULL;
+ return (rc);
+}
+
/* caller must lock conf_lock */
static void _init_slurm_conf(const char *file_name)
{
@@ -1995,7 +2114,7 @@
if (name == NULL)
name = default_slurm_config_file;
}
- if(conf_initialized) {
+ if (conf_initialized) {
error("the conf_hashtbl is already inited");
}
conf_hashtbl = s_p_hashtbl_create(slurm_conf_options);
@@ -2003,9 +2122,11 @@
/* init hash to 0 */
conf_ptr->hash_val = 0;
- if (s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name, false)
- == SLURM_ERROR)
+ if ((_config_is_storage(conf_hashtbl, name) < 0) &&
+ (s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name, false)
+ == SLURM_ERROR)) {
fatal("something wrong with opening/reading conf file");
+ }
/* s_p_dump_values(conf_hashtbl, slurm_conf_options); */
_validate_and_set_defaults(conf_ptr, conf_hashtbl);
conf_ptr->slurm_conf = xstrdup(name);
@@ -2115,7 +2236,7 @@
}
extern void
-slurm_conf_install_fork_handlers()
+slurm_conf_install_fork_handlers(void)
{
int err;
if ((err = pthread_atfork(NULL, NULL, &slurm_conf_mutex_init)))
@@ -2215,10 +2336,10 @@
s_p_get_string(&conf->cluster_name, "ClusterName", hashtbl);
/* Some databases are case sensitive so we have to make sure
- the cluster name is lower case since sacctmgr makes sure
- this is the case as well.
- */
- if(conf->cluster_name) {
+ * the cluster name is lower case since sacctmgr makes sure
+ * this is the case as well.
+ */
+ if (conf->cluster_name) {
int i;
for (i = 0; conf->cluster_name[i] != '\0'; i++)
conf->cluster_name[i] =
@@ -2343,16 +2464,16 @@
s_p_get_string(&conf->gres_plugins, "GresTypes", hashtbl);
if (s_p_get_uint16(&conf->inactive_limit, "InactiveLimit", hashtbl)) {
-#ifdef HAVE_BG
- /* Inactive limit must be zero on Blue Gene */
+#ifdef HAVE_BG_L_P
+ /* Inactive limit must be zero on BlueGene L/P */
if (conf->inactive_limit) {
- error("InactiveLimit=%d is invalid on Blue Gene",
+ error("InactiveLimit=%d is invalid on BlueGene L/P",
conf->inactive_limit);
}
conf->inactive_limit = 0;
#endif
} else {
-#ifdef HAVE_BG
+#ifdef HAVE_BG_L_P
conf->inactive_limit = 0;
#endif
conf->inactive_limit = DEFAULT_INACTIVE_LIMIT;
@@ -2698,6 +2819,11 @@
else
conf->priority_favor_small = 0;
+ conf->priority_flags = 0;
+ if (s_p_get_string(&temp_str, "PriorityFlags", hashtbl)) {
+ if (strstr(temp_str, "ACCRUE_ALWAYS"))
+ conf->priority_flags |= PRIORITY_FLAGS_ACCRUE_ALWAYS;
+ }
if (s_p_get_string(&temp_str, "PriorityMaxAge", hashtbl)) {
int max_time = time_str2mins(temp_str);
if ((max_time < 0) && (max_time != INFINITE)) {
@@ -2827,6 +2953,14 @@
conf->propagate_rlimits);
}
+ if (s_p_get_string(&temp_str, "ReconfigFlags", hashtbl)) {
+ conf->reconfig_flags = reconfig_str2flags(temp_str);
+ if (conf->reconfig_flags == 0xffff)
+ fatal("ReconfigFlags invalid: %s", temp_str);
+ xfree(temp_str);
+ } else /* Default: no ReconfigFlags */
+ conf->reconfig_flags = 0;
+
if (!s_p_get_uint16(&conf->ret2service, "ReturnToService", hashtbl))
conf->ret2service = DEFAULT_RETURN_TO_SERVICE;
#ifdef HAVE_CRAY
@@ -2842,6 +2976,8 @@
if (!s_p_get_uint16(&conf->resume_timeout, "ResumeTimeout", hashtbl))
conf->resume_timeout = DEFAULT_RESUME_TIMEOUT;
+ s_p_get_string(&conf->reboot_program, "RebootProgram", hashtbl);
+
s_p_get_string(&conf->salloc_default_command, "SallocDefaultCommand",
hashtbl);
@@ -2946,9 +3082,13 @@
}
}
- if (s_p_get_uint16(&conf->slurmctld_debug, "SlurmctldDebug", hashtbl))
+ if (s_p_get_string(&temp_str, "SlurmctldDebug", hashtbl)) {
+ conf->slurmctld_debug = log_string2num(temp_str);
+ if (conf->slurmctld_debug == (uint16_t) NO_VAL)
+ fatal("Invalid SlurmctldDebug %s", temp_str);
+ xfree(temp_str);
_normalize_debug_level(&conf->slurmctld_debug);
- else
+ } else
conf->slurmctld_debug = LOG_LEVEL_INFO;
if (!s_p_get_string(&conf->slurmctld_pidfile,
@@ -2991,9 +3131,13 @@
"SlurmctldTimeout", hashtbl))
conf->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT;
- if (s_p_get_uint16(&conf->slurmd_debug, "SlurmdDebug", hashtbl))
+ if (s_p_get_string(&temp_str, "SlurmdDebug", hashtbl)) {
+ conf->slurmd_debug = log_string2num(temp_str);
+ if (conf->slurmd_debug == (uint16_t) NO_VAL)
+ fatal("Invalid SlurmdDebug %s", temp_str);
+ xfree(temp_str);
_normalize_debug_level(&conf->slurmd_debug);
- else
+ } else
conf->slurmd_debug = LOG_LEVEL_INFO;
s_p_get_string(&conf->slurmd_logfile, "SlurmdLogFile", hashtbl);
@@ -3221,6 +3365,11 @@
xstrcat(rc, ",");
xstrcat(rc, "NO_CONF_HASH");
}
+ if (debug_flags & DEBUG_FLAG_NO_REALTIME) {
+ if (rc)
+ xstrcat(rc, ",");
+ xstrcat(rc, "NoRealTime");
+ }
if (debug_flags & DEBUG_FLAG_PRIO) {
if (rc)
xstrcat(rc, ",");
@@ -3291,6 +3440,8 @@
rc |= DEBUG_FLAG_GRES;
else if (strcasecmp(tok, "NO_CONF_HASH") == 0)
rc |= DEBUG_FLAG_NO_CONF_HASH;
+ else if (strcasecmp(tok, "NoRealTime") == 0)
+ rc |= DEBUG_FLAG_NO_REALTIME;
else if (strcasecmp(tok, "Priority") == 0)
rc |= DEBUG_FLAG_PRIO;
else if (strcasecmp(tok, "Reservation") == 0)
@@ -3319,6 +3470,53 @@
return rc;
}
+/*
+ * reconfig_flags2str - convert a ReconfFlags uint16_t to the equivalent string
+ * Keep in sync with reconfig_str2flags() below
+ */
+extern char * reconfig_flags2str(uint16_t reconfig_flags)
+{
+ char *rc = NULL;
+
+ if (reconfig_flags & RECONFIG_KEEP_PART_INFO) {
+ if (rc)
+ xstrcat(rc, ",");
+ xstrcat(rc, "KeepPartInfo");
+ }
+
+ return rc;
+}
+
+/*
+ * reconfig_str2flags - Convert a ReconfFlags string to the equivalent uint16_t
+ * Keep in sync with reconfig_flags2str() above
+ * Returns NO_VAL if invalid
+ */
+extern uint16_t reconfig_str2flags(char *reconfig_flags)
+{
+ uint16_t rc = 0;
+ char *tmp_str, *tok, *last = NULL;
+
+ if (!reconfig_flags)
+ return rc;
+
+ tmp_str = xstrdup(reconfig_flags);
+ tok = strtok_r(tmp_str, ",", &last);
+ while (tok) {
+ if (strcasecmp(tok, "KeepPartInfo") == 0)
+ rc |= RECONFIG_KEEP_PART_INFO;
+ else {
+ error("Invalid ReconfigFlag: %s", tok);
+ rc = (uint16_t) NO_VAL;
+ break;
+ }
+ tok = strtok_r(NULL, ",", &last);
+ }
+ xfree(tmp_str);
+
+ return rc;
+}
+
extern void destroy_config_key_pair(void *object)
{
config_key_pair_t *key_pair_ptr = (config_key_pair_t *)object;
diff --git a/src/common/read_config.h b/src/common/read_config.h
index 05fc4fc..ad404bb 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -109,10 +109,11 @@
#define DEFAULT_PRIORITY_DECAY 604800 /* 7 days */
#define DEFAULT_PRIORITY_CALC_PERIOD 300 /* in seconds */
#define DEFAULT_PRIORITY_TYPE "priority/basic"
+#define DEFAULT_RECONF_KEEP_PART_STATE 0
#define DEFAULT_RETURN_TO_SERVICE 0
#define DEFAULT_RESUME_RATE 300
#define DEFAULT_RESUME_TIMEOUT 60
-#define DEFAULT_SAVE_STATE_LOC "/tmp"
+#define DEFAULT_SAVE_STATE_LOC "/var/spool"
#define DEFAULT_SCHEDROOTFILTER 1
#define DEFAULT_SCHEDULER_PORT 7321
#define DEFAULT_SCHED_LOG_LEVEL 0
@@ -205,6 +206,8 @@
char *nodes; /* comma delimited list names of nodes */
uint16_t preempt_mode; /* See PREEMPT_MODE_* in slurm/slurm.h */
uint16_t priority; /* scheduling priority for jobs */
+ bool req_resv_flag; /* 1 if partition can only be used in a
+ * reservation */
bool root_only_flag;/* 1 if allocate/submit RPC can only be
issued by user root */
uint16_t state_up; /* for states see PARTITION_* in slurm.h */
@@ -270,7 +273,7 @@
/* slurm_conf_install_fork_handlers
* installs what to do with a fork with the conf mutex
*/
-void slurm_conf_install_fork_handlers();
+void slurm_conf_install_fork_handlers(void);
/*
* NOTE: Caller must NOT be holding slurm_conf_lock().
@@ -315,6 +318,12 @@
extern int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[]);
/*
+ * slurm_reset_alias - Reset the address and hostname of a specific node name
+ */
+extern void slurm_reset_alias(char *node_name, char *node_addr,
+ char *node_hostname);
+
+/*
* slurm_conf_get_hostname - Return the NodeHostname for given NodeName
*
* Returned string was allocated with xmalloc(), and must be freed by
@@ -437,6 +446,18 @@
*/
extern uint32_t debug_str2flags(char *debug_flags);
+/*
+ * reconfig_flags2str - convert a ReconfigFlags uint16_t to the equivalent string
+ * Returns an xmalloc()ed string which the caller must free with xfree().
+ */
+extern char *reconfig_flags2str(uint16_t reconfig_flags);
+
+/*
+ * reconfig_str2flags - Convert a ReconfigFlags string to the equivalent uint16_t
+ * Returns NO_VAL if invalid
+ */
+extern uint16_t reconfig_str2flags(char *reconfig_flags);
+
extern void destroy_config_key_pair(void *object);
extern void pack_config_key_pair(void *in, uint16_t rpc_version, Buf buffer);
extern int unpack_config_key_pair(void **object, uint16_t rpc_version,
diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c
index 937a9fc..575edb6 100644
--- a/src/common/slurm_accounting_storage.c
+++ b/src/common/slurm_accounting_storage.c
@@ -134,7 +134,7 @@
slurmdb_account_cond_t *acct_cond);
List (*get_clusters) (void *db_conn, uint32_t uid,
slurmdb_cluster_cond_t *cluster_cond);
- List (*get_config) (void *db_conn);
+ List (*get_config) (void *db_conn, char *config_name);
List (*get_associations) (void *db_conn, uint32_t uid,
slurmdb_association_cond_t *assoc_cond);
List (*get_events) (void *db_conn, uint32_t uid,
@@ -201,6 +201,7 @@
static slurm_acct_storage_context_t * g_acct_storage_context = NULL;
static pthread_mutex_t g_acct_storage_context_lock =
PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
/*
* Local functions
@@ -388,6 +389,9 @@
int retval = SLURM_SUCCESS;
char *acct_storage_type = NULL;
+ if ( init_run && g_acct_storage_context )
+ return retval;
+
slurm_mutex_lock( &g_acct_storage_context_lock );
if ( g_acct_storage_context )
@@ -411,7 +415,8 @@
_acct_storage_context_destroy( g_acct_storage_context );
g_acct_storage_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
slurm_mutex_unlock( &g_acct_storage_context_lock );
@@ -426,6 +431,7 @@
if (!g_acct_storage_context)
return SLURM_SUCCESS;
+ init_run = false;
// (*(g_acct_storage_context->ops.acct_storage_fini))();
rc = _acct_storage_context_destroy( g_acct_storage_context );
g_acct_storage_context = NULL;
@@ -713,11 +719,11 @@
(db_conn, uid, cluster_cond);
}
-extern List acct_storage_g_get_config(void *db_conn)
+extern List acct_storage_g_get_config(void *db_conn, char *config_name)
{
if (slurm_acct_storage_init(NULL) < 0)
return NULL;
- return (*(g_acct_storage_context->ops.get_config))(db_conn);
+ return (*(g_acct_storage_context->ops.get_config))(db_conn, config_name);
}
extern List acct_storage_g_get_associations(
diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h
index 2b9bd79..881fcdc 100644
--- a/src/common/slurm_accounting_storage.h
+++ b/src/common/slurm_accounting_storage.h
@@ -327,7 +327,7 @@
* RET: List of config_key_pairs_t *
* note List needs to be freed when called
*/
-extern List acct_storage_g_get_config(void *db_conn);
+extern List acct_storage_g_get_config(void *db_conn, char *config_name);
/*
* get info from the storage
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index 264c605..12ad31e 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -106,6 +106,7 @@
*/
static slurm_auth_context_t g_context = NULL;
static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
/*
* Order of advisory arguments passed to some of the plugins.
@@ -315,6 +316,9 @@
int retval = SLURM_SUCCESS;
char *auth_type_local = NULL;
+ if ( init_run && g_context )
+ return retval;
+
slurm_mutex_lock( &context_lock );
if ( g_context )
@@ -343,7 +347,8 @@
_slurm_auth_context_destroy( g_context );
g_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
xfree(auth_type_local);
@@ -360,6 +365,7 @@
if ( !g_context )
return SLURM_SUCCESS;
+ init_run = false;
rc = _slurm_auth_context_destroy( g_context );
g_context = NULL;
return rc;
diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c
index f43cf56..182db0e 100644
--- a/src/common/slurm_cred.c
+++ b/src/common/slurm_cred.c
@@ -78,8 +78,9 @@
*/
#define DEFAULT_EXPIRATION_WINDOW 1200
+#define EXTREME_DEBUG 0
#define MAX_TIME 0x7fffffff
-#define SBCAST_CACHE_SIZE 64
+#define SBCAST_CACHE_SIZE 256
/*
* slurm job credential state
@@ -409,6 +410,9 @@
char *crypto_type = NULL;
int retval = SLURM_SUCCESS;
+ if ( g_crypto_context ) /* mostly avoid locks for better speed */
+ return SLURM_SUCCESS;
+
slurm_mutex_lock( &g_crypto_context_lock );
if ( g_crypto_context )
goto done;
@@ -644,8 +648,12 @@
#ifndef HAVE_BG
{
int i, sock_recs = 0;
+#ifndef HAVE_CRAY
+ /* Zero compute node allocations allowed on a Cray for use
+ * of front-end nodes */
xassert(arg->job_nhosts);
- for (i=0; i<arg->job_nhosts; i++) {
+#endif
+ for (i = 0; i < arg->job_nhosts; i++) {
sock_recs += arg->sock_core_rep_count[i];
if (sock_recs >= arg->job_nhosts)
break;
@@ -1631,7 +1639,7 @@
}
-#ifdef EXTREME_DEBUG
+#if EXTREME_DEBUG
static void
_print_data(char *data, int datalen)
{
@@ -1909,14 +1917,16 @@
static void
_clear_expired_job_states(slurm_cred_ctx_t ctx)
{
- char t1[64], t2[64], t3[64];
time_t now = time(NULL);
ListIterator i = NULL;
job_state_t *j = NULL;
i = list_iterator_create(ctx->job_list);
-
+ if (!i)
+ fatal("list_iterator_create: malloc failure");
while ((j = list_next(i))) {
+#if EXTREME_DEBUG
+ char t1[64], t2[64], t3[64];
if (j->revoked) {
strcpy(t2, " revoked:");
timestr(&j->revoked, (t2+9), (64-9));
@@ -1931,7 +1941,7 @@
}
debug3("state for jobid %u: ctime:%s%s%s",
j->jobid, timestr(&j->ctime, t1, 64), t2, t3);
-
+#endif
if (j->revoked && (now > j->expiration)) {
list_delete_item(i);
}
@@ -2167,7 +2177,8 @@
* including digital signature.
* RET the sbcast credential or NULL on error */
sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx,
- uint32_t job_id, char *nodes)
+ uint32_t job_id, char *nodes,
+ time_t expiration)
{
Buf buffer;
int rc;
@@ -2180,7 +2191,7 @@
sbcast_cred = xmalloc(sizeof(struct sbcast_cred));
sbcast_cred->ctime = now;
- sbcast_cred->expiration = now + DEFAULT_EXPIRATION_WINDOW;
+ sbcast_cred->expiration = expiration;
sbcast_cred->jobid = job_id;
sbcast_cred->nodes = xstrdup(nodes);
@@ -2261,9 +2272,12 @@
_pack_sbcast_cred(sbcast_cred, buffer);
/* NOTE: the verification checks that the credential was
* created by SlurmUser or root */
- rc = (*(g_crypto_context->ops.crypto_verify_sign))(ctx->key,
- get_buf_data(buffer), get_buf_offset(buffer),
- sbcast_cred->signature, sbcast_cred->siglen);
+ rc = (*(g_crypto_context->ops.crypto_verify_sign)) (
+ ctx->key,
+ get_buf_data(buffer),
+ get_buf_offset(buffer),
+ sbcast_cred->signature,
+ sbcast_cred->siglen);
free_buf(buffer);
if (rc) {
@@ -2276,7 +2290,7 @@
* and reduces the possibility of a duplicate value */
for (i=0; i<sbcast_cred->siglen; i+=2) {
sig_num += (sbcast_cred->signature[i] << 8) +
- sbcast_cred->signature[i+1];
+ sbcast_cred->signature[i+1];
}
/* add to cache */
for (i=0; i<SBCAST_CACHE_SIZE; i++) {
@@ -2297,13 +2311,13 @@
/* overwrite the oldest */
cache_expire[oldest_cache_inx] = sbcast_cred->
- expiration;
+ expiration;
cache_value[oldest_cache_inx] = sig_num;
}
} else {
for (i=0; i<sbcast_cred->siglen; i+=2) {
sig_num += (sbcast_cred->signature[i] << 8) +
- sbcast_cred->signature[i+1];
+ sbcast_cred->signature[i+1];
}
for (i=0; i<SBCAST_CACHE_SIZE; i++) {
if ((cache_expire[i] == sbcast_cred->expiration) &&
@@ -2361,4 +2375,5 @@
info("Sbcast_cred: Jobid %u", sbcast_cred->jobid );
info("Sbcast_cred: Nodes %s", sbcast_cred->nodes );
info("Sbcast_cred: ctime %s", ctime(&sbcast_cred->ctime) );
+ info("Sbcast_cred: Expire %s", ctime(&sbcast_cred->expiration) );
}
diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h
index e3469d9..98312da 100644
--- a/src/common/slurm_cred.h
+++ b/src/common/slurm_cred.h
@@ -129,7 +129,7 @@
*
* On pack() ctx is packed in machine-independent format into the
* buffer, on unpack() the contents of the buffer are used to
- * intialize the state of the context ctx.
+ * initialize the state of the context ctx.
*/
int slurm_cred_ctx_pack(slurm_cred_ctx_t ctx, Buf buffer);
int slurm_cred_ctx_unpack(slurm_cred_ctx_t ctx, Buf buffer);
@@ -337,7 +337,8 @@
* Caller of extract_sbcast_cred() must xfree returned node string
*/
sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx,
- uint32_t job_id, char *nodes);
+ uint32_t job_id, char *nodes,
+ time_t expiration);
sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred);
void delete_sbcast_cred(sbcast_cred_t *sbcast_cred);
int extract_sbcast_cred(slurm_cred_ctx_t ctx,
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index df158b9..e6cd4d8 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -280,6 +280,8 @@
"Step limit reached for this job" },
{ ESLURM_JOB_SUSPENDED,
"Job is current suspended, requested operation disabled" },
+ { ESLURM_CAN_NOT_START_IMMEDIATELY,
+ "Job can not start immediately" },
/* slurmd error codes */
@@ -379,7 +381,7 @@
{ ESLURM_DB_CONNECTION,
"Unable to connect to database" },
{ ESLURM_JOBS_RUNNING_ON_ASSOC,
- "Job(s) running, cancel job(s) before remove" },
+ "Job(s) active, cancel job(s) before remove" },
{ ESLURM_CLUSTER_DELETED,
"Cluster deleted, commit/rollback immediately" },
{ ESLURM_ONE_CHANGE,
diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c
index 44615ce..b927c9c 100644
--- a/src/common/slurm_jobacct_gather.c
+++ b/src/common/slurm_jobacct_gather.c
@@ -107,6 +107,7 @@
static slurm_jobacct_gather_context_t *g_jobacct_gather_context = NULL;
static pthread_mutex_t g_jobacct_gather_context_lock = PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
static int _slurm_jobacct_gather_init(void);
@@ -258,6 +259,9 @@
char *jobacct_gather_type = NULL;
int retval=SLURM_SUCCESS;
+ if (init_run && g_jobacct_gather_context )
+ return(retval);
+
slurm_mutex_lock( &g_jobacct_gather_context_lock );
if ( g_jobacct_gather_context )
goto done;
@@ -278,7 +282,8 @@
g_jobacct_gather_context);
g_jobacct_gather_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
slurm_mutex_unlock( &g_jobacct_gather_context_lock );
@@ -297,13 +302,16 @@
extern int slurm_jobacct_gather_fini(void)
{
- int rc;
+ int rc = SLURM_SUCCESS;
- if (!g_jobacct_gather_context)
- return SLURM_SUCCESS;
-
- rc = _slurm_jobacct_gather_context_destroy(g_jobacct_gather_context);
- g_jobacct_gather_context = NULL;
+ slurm_mutex_lock( &g_jobacct_gather_context_lock );
+ if (g_jobacct_gather_context) {
+ init_run = false;
+ rc = _slurm_jobacct_gather_context_destroy(
+ g_jobacct_gather_context);
+ g_jobacct_gather_context = NULL;
+ }
+ slurm_mutex_unlock( &g_jobacct_gather_context_lock );
return rc;
}
diff --git a/src/common/slurm_priority.c b/src/common/slurm_priority.c
index 1011723..8a75799 100644
--- a/src/common/slurm_priority.c
+++ b/src/common/slurm_priority.c
@@ -49,7 +49,7 @@
double (*calc_fs_factor) (long double usage_efctv,
long double shares_norm);
List (*get_priority_factors)
- (priority_factors_request_msg_t *req_msg);
+ (priority_factors_request_msg_t *req_msg, uid_t uid);
} slurm_priority_ops_t;
typedef struct slurm_priority_context {
@@ -63,6 +63,7 @@
static slurm_priority_context_t * g_priority_context = NULL;
static pthread_mutex_t g_priority_context_lock =
PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
/*
* Local functions
@@ -198,6 +199,9 @@
int retval = SLURM_SUCCESS;
char *priority_type = NULL;
+ if ( init_run && g_priority_context )
+ return retval;
+
slurm_mutex_lock( &g_priority_context_lock );
if ( g_priority_context )
@@ -218,7 +222,8 @@
_priority_context_destroy( g_priority_context );
g_priority_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
slurm_mutex_unlock( &g_priority_context_lock );
@@ -233,6 +238,7 @@
if (!g_priority_context)
return SLURM_SUCCESS;
+ init_run = false;
rc = _priority_context_destroy( g_priority_context );
g_priority_context = NULL;
return rc;
@@ -276,10 +282,10 @@
}
extern List priority_g_get_priority_factors_list(
- priority_factors_request_msg_t *req_msg)
+ priority_factors_request_msg_t *req_msg, uid_t uid)
{
if (slurm_priority_init() < 0)
return NULL;
- return (*(g_priority_context->ops.get_priority_factors))(req_msg);
+ return (*(g_priority_context->ops.get_priority_factors))(req_msg, uid);
}
diff --git a/src/common/slurm_priority.h b/src/common/slurm_priority.h
index 65da5b3..86c087f 100644
--- a/src/common/slurm_priority.h
+++ b/src/common/slurm_priority.h
@@ -65,6 +65,6 @@
extern double priority_g_calc_fs_factor(long double usage_efctv,
long double shares_norm);
extern List priority_g_get_priority_factors_list(
- priority_factors_request_msg_t *req_msg);
+ priority_factors_request_msg_t *req_msg, uid_t uid);
#endif /*_SLURM_PRIORIY_H */
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index d7c679d..02fd6ac 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -1504,6 +1504,24 @@
return 0;
}
+/* slurm_get_kill_wait
+ * returns kill_wait from slurmctld_conf object
+ * RET uint16_t - kill_wait
+ */
+uint16_t slurm_get_kill_wait(void)
+{
+ uint16_t kill_wait = 0;
+ slurm_ctl_conf_t *conf;
+
+ if (slurmdbd_conf) {
+ } else {
+ conf = slurm_conf_lock();
+ kill_wait = conf->kill_wait;
+ slurm_conf_unlock();
+ }
+ return kill_wait;
+}
+
/* slurm_get_preempt_type
* get PreemptType from slurmctld_conf object
* RET char * - preempt type, MUST be xfreed by caller
@@ -2374,6 +2392,7 @@
/*
* Unpack message body
*/
+ msg.protocol_version = header.version;
msg.msg_type = header.msg_type;
msg.flags = header.flags;
@@ -2604,6 +2623,7 @@
/*
* Unpack message body
*/
+ msg->protocol_version = header.version;
msg->msg_type = header.msg_type;
msg->flags = header.flags;
@@ -3648,6 +3668,50 @@
#endif
+/*
+ * slurm_forward_data - forward arbitrary data to unix domain sockets on nodes
+ * IN nodelist: nodes to forward data to
+ * IN address: address of unix domain socket
+ * IN len: length of data
+ * IN data: real data
+ * RET: error code
+ */
+extern int
+slurm_forward_data(char *nodelist, char *address, uint32_t len, char *data)
+{
+ List ret_list = NULL;
+ int temp_rc = 0, rc = 0;
+ ret_data_info_t *ret_data_info = NULL;
+ slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
+ forward_data_msg_t req;
+
+ slurm_msg_t_init(msg);
+
+ debug("slurm_forward_data: nodelist=%s, address=%s, len=%u",
+ nodelist, address, len);
+ req.address = address;
+ req.len = len;
+ req.data = data;
+
+ msg->msg_type = REQUEST_FORWARD_DATA;
+ msg->data = &req;
+
+ if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) {
+ while ((ret_data_info = list_pop(ret_list))) {
+ temp_rc = slurm_get_return_code(ret_data_info->type,
+ ret_data_info->data);
+ if (temp_rc)
+ rc = temp_rc;
+ }
+ } else {
+ error("slurm_forward_data: no list was returned");
+ rc = SLURM_ERROR;
+ }
+
+ slurm_free_msg(msg);
+ return rc;
+}
+
/*
* vi: shiftwidth=8 tabstop=8 expandtab
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 55d9b96..584ff22 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -503,6 +503,12 @@
*/
int slurm_set_jobcomp_port(uint32_t port);
+/* slurm_get_kill_wait
+ * returns kill_wait from slurmctld_conf object
+ * RET uint16_t - kill_wait
+ */
+uint16_t slurm_get_kill_wait(void);
+
/* slurm_get_preempt_type
* get PreemptType from slurmctld_conf object
* RET char * - preempt type, MUST be xfreed by caller
@@ -1082,4 +1088,18 @@
extern int slurm_job_step_create (
job_step_create_request_msg_t *slurm_step_alloc_req_msg,
job_step_create_response_msg_t **slurm_step_alloc_resp_msg);
+
+
+/* Should this be in <slurm/slurm.h> ? */
+/*
+ * slurm_forward_data - forward arbitrary data to unix domain sockets on nodes
+ * IN nodelist: nodes to forward data to
+ * IN address: address of unix domain socket
+ * IN len: length of data
+ * IN data: real data
+ * RET: error code
+ */
+extern int slurm_forward_data(char *nodelist, char *address, uint32_t len,
+ char *data);
+
#endif
diff --git a/src/common/slurm_protocol_common.h b/src/common/slurm_protocol_common.h
index 06d80fd..187fd38 100644
--- a/src/common/slurm_protocol_common.h
+++ b/src/common/slurm_protocol_common.h
@@ -70,7 +70,8 @@
* In slurm_protocol_util.c check_header_version(), and init_header()
* need to be updated also when changes are added */
#define SLURM_PROTOCOL_VERSION ((SLURM_API_MAJOR << 8) | SLURM_API_AGE)
-#define SLURM_2_3_PROTOCOL_VERSION SLURM_PROTOCOL_VERSION
+#define SLURM_2_4_PROTOCOL_VERSION SLURM_PROTOCOL_VERSION
+#define SLURM_2_3_PROTOCOL_VERSION ((23 << 8) | 0)
#define SLURM_2_2_PROTOCOL_VERSION ((22 << 8) | 0)
#define SLURM_2_1_PROTOCOL_VERSION ((21 << 8) | 0)
#define SLURM_2_0_PROTOCOL_VERSION ((20 << 8) | 0)
@@ -80,10 +81,6 @@
#define SLURM_PROTOCOL_NO_FLAGS 0
#define SLURM_GLOBAL_AUTH_KEY 0x0001
-#if MONGO_IMPLEMENTATION
-# include "src/common/slurm_protocol_mongo_common.h"
-#else
-# include "src/common/slurm_protocol_socket_common.h"
-#endif
+#include "src/common/slurm_protocol_socket_common.h"
#endif
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 882e3e3..40accfa 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -117,7 +117,7 @@
* values from the "src" slurm_msg_t structure.
* IN src - Pointer to the initialized message from which "dest" will
* be initialized.
- * OUT dest - Pointer to the slurm_msg_t which will be intialized.
+ * OUT dest - Pointer to the slurm_msg_t which will be initialized.
* NOTE: the "dest" structure will contain pointers into the contents of "src".
*/
extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src)
@@ -153,7 +153,7 @@
/* make a buffer 2 times the size just to be safe */
copy = dup = xmalloc((2 * len) + 1);
if (copy)
- do if (*str == '\'' || *str == '"')
+ do if (*str == '\\' || *str == '\'' || *str == '"')
*dup++ = '\\';
while ((*dup++ = *str++));
@@ -281,6 +281,14 @@
xfree(msg);
}
+extern void slurm_free_reboot_msg(reboot_msg_t * msg)
+{
+ if (msg) {
+ xfree(msg->node_list);
+ xfree(msg);
+ }
+}
+
extern void slurm_free_shutdown_msg(shutdown_msg_t * msg)
{
xfree(msg);
@@ -408,6 +416,7 @@
int i;
if (msg) {
+ xfree(msg->alias_list);
xfree(msg->nodes);
xfree(msg->cpu_bind);
xfree(msg->cpus_per_node);
@@ -514,6 +523,8 @@
if (msg) {
xfree(msg->features);
xfree(msg->gres);
+ xfree(msg->node_addr);
+ xfree(msg->node_hostname);
xfree(msg->node_names);
xfree(msg->reason);
xfree(msg);
@@ -543,6 +554,7 @@
xfree(msg->features);
xfree(msg->licenses);
xfree(msg->name);
+ xfree(msg->node_cnt);
xfree(msg->node_list);
xfree(msg->partition);
xfree(msg->users);
@@ -654,6 +666,7 @@
}
xfree(msg->env);
}
+ xfree(msg->alias_list);
xfree(msg->cwd);
xfree(msg->cpu_bind);
xfree(msg->mem_bind);
@@ -843,6 +856,16 @@
xfree(msg);
}
+/*extern void slurm_free_stats_request_msg(stats_desc_msg_t *msg)
+{
+ xfree(msg);
+}*/
+
+extern void slurm_free_stats_response_msg(stats_info_response_msg_t *msg)
+{
+ xfree(msg);
+}
+
extern void slurm_free_spank_env_request_msg(spank_env_request_msg_t *msg)
{
xfree(msg);
@@ -949,6 +972,15 @@
}
}
+inline void slurm_free_forward_data_msg(forward_data_msg_t *msg)
+{
+ if (msg) {
+ xfree(msg->address);
+ xfree(msg->data);
+ xfree(msg);
+ }
+}
+
extern char *preempt_mode_string(uint16_t preempt_mode)
{
char *gang_str;
@@ -1025,6 +1057,70 @@
return mode_num;
}
+/* Convert log level number to equivalent string */
+extern char *log_num2string(uint16_t inx)
+{
+ switch (inx) {
+ case 0:
+ return "quiet";
+ case 1:
+ return "fatal";
+ case 2:
+ return "error";
+ case 3:
+ return "info";
+ case 4:
+ return "verbose";
+ case 5:
+ return "debug";
+ case 6:
+ return "debug2";
+ case 7:
+ return "debug3";
+ case 8:
+ return "debug4";
+ case 9:
+ return "debug5";
+ default:
+ return "unknown";
+ }
+}
+
+/* Convert log level string to equivalent number */
+extern uint16_t log_string2num(char *name)
+{
+ if (name == NULL)
+ return (uint16_t) NO_VAL;
+
+ if ((name[0] >= '0') && (name[0] <= '9'))
+ return (uint16_t) atoi(name);
+
+ if (!strcasecmp(name, "quiet"))
+ return (uint16_t) 0;
+ if (!strcasecmp(name, "fatal"))
+ return (uint16_t) 1;
+ if (!strcasecmp(name, "error"))
+ return (uint16_t) 2;
+ if (!strcasecmp(name, "info"))
+ return (uint16_t) 3;
+ if (!strcasecmp(name, "verbose"))
+ return (uint16_t) 4;
+ if (!strcasecmp(name, "debug"))
+ return (uint16_t) 5;
+ if (!strcasecmp(name, "debug2"))
+ return (uint16_t) 6;
+ if (!strcasecmp(name, "debug3"))
+ return (uint16_t) 7;
+ if (!strcasecmp(name, "debug4"))
+ return (uint16_t) 8;
+ if (!strcasecmp(name, "debug5"))
+ return (uint16_t) 9;
+ if (!strcasecmp(name, "debug2"))
+ return (uint16_t) 6;
+
+ return (uint16_t) NO_VAL;
+}
+
/* Convert SelectTypeParameter to equivalent string
* NOTE: Not reentrant */
extern char *sched_param_type_string(uint16_t select_type_param)
@@ -1210,11 +1306,11 @@
else if (trig_type == TRIGGER_TYPE_PRI_CTLD_ACCT_FULL)
return "primary_slurmctld_acct_buffer_full";
else if (trig_type == TRIGGER_TYPE_BU_CTLD_FAIL)
- return "backup_ctld_failure";
+ return "backup_slurmctld_failure";
else if (trig_type == TRIGGER_TYPE_BU_CTLD_RES_OP)
- return "backup_ctld_resumed_operation";
+ return "backup_slurmctld_resumed_operation";
else if (trig_type == TRIGGER_TYPE_BU_CTLD_AS_CTRL)
- return "backup_ctld_assumed_control";
+ return "backup_slurmctld_assumed_control";
else if (trig_type == TRIGGER_TYPE_PRI_DBD_FAIL)
return "primary_slurmdbd_failure";
else if (trig_type == TRIGGER_TYPE_PRI_DBD_RES_OP)
@@ -1285,6 +1381,16 @@
xstrcat(flag_str, ",");
xstrcat(flag_str, "NO_LICENSE_ONLY");
}
+ if (flags & RESERVE_FLAG_STATIC) {
+ if (flag_str[0])
+ xstrcat(flag_str, ",");
+ xstrcat(flag_str, "STATIC");
+ }
+ if (flags & RESERVE_FLAG_NO_STATIC) {
+ if (flag_str[0])
+ xstrcat(flag_str, ",");
+ xstrcat(flag_str, "NO_STATIC");
+ }
return flag_str;
}
@@ -1303,7 +1409,8 @@
if (maint_flag) {
if (no_resp_flag)
return "MAINT*";
- return "MAINT";
+ if (base != NODE_STATE_ALLOCATED)
+ return "MAINT";
}
if (drain_flag) {
if (comp_flag || (base == NODE_STATE_ALLOCATED)) {
@@ -1420,7 +1527,8 @@
if (maint_flag) {
if (no_resp_flag)
return "MAINT*";
- return "MAINT";
+ if (inx != NODE_STATE_ALLOCATED)
+ return "MAINT";
}
if (drain_flag) {
if (comp_flag || (inx == NODE_STATE_ALLOCATED)) {
@@ -1726,6 +1834,7 @@
if (msg) {
select_g_select_jobinfo_free(msg->select_jobinfo);
msg->select_jobinfo = NULL;
+ xfree(msg->alias_list);
xfree(msg->node_list);
xfree(msg->cpus_per_node);
xfree(msg->cpu_count_reps);
@@ -2157,9 +2266,6 @@
xfree(block_info->mloaderimage);
xfree(block_info->mp_inx);
xfree(block_info->mp_str);
- xfree(block_info->mp_used_inx);
- xfree(block_info->mp_used_str);
- xfree(block_info->owner_name);
xfree(block_info->ramdiskimage);
xfree(block_info->reason);
}
@@ -2192,6 +2298,7 @@
{
xfree(msg);
}
+
extern void slurm_free_trigger_msg(trigger_info_msg_t *msg)
{
int i;
@@ -2247,6 +2354,13 @@
}
}
+
+inline void slurm_free_stats_info_request_msg(stats_info_request_msg_t *msg)
+{
+ xfree(msg);
+}
+
+
extern void slurm_destroy_priority_factors_object(void *object)
{
priority_factors_object_t *obj_ptr =
@@ -2472,6 +2586,9 @@
case REQUEST_TOPO_INFO:
/* No body to free */
break;
+ case REQUEST_REBOOT_NODES:
+ slurm_free_reboot_msg(data);
+ break;
case ACCOUNTING_UPDATE_MSG:
slurm_free_accounting_update_msg(data);
break;
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 3865555..4e3efbd 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -122,6 +122,8 @@
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_FUTURE)
/* Derived node states */
+#define IS_NODE_CLOUD(_X) \
+ (_X->node_state & NODE_STATE_CLOUD)
#define IS_NODE_DRAIN(_X) \
(_X->node_state & NODE_STATE_DRAIN)
#define IS_NODE_DRAINING(_X) \
@@ -144,6 +146,8 @@
#define IS_NODE_MAINT(_X) \
(_X->node_state & NODE_STATE_MAINT)
+#define THIS_FILE ((strrchr(__FILE__, '/') ?: __FILE__ - 1) + 1)
+
/* used to define flags of the launch_tasks_request_msg_t and
* spawn task_request_msg_t task_flags
*/
@@ -175,6 +179,7 @@
REQUEST_TAKEOVER,
REQUEST_SET_SCHEDLOG_LEVEL,
REQUEST_SET_DEBUG_FLAGS,
+ REQUEST_REBOOT_NODES,
REQUEST_BUILD_INFO = 2001,
RESPONSE_BUILD_INFO,
@@ -210,6 +215,10 @@
RESPONSE_FRONT_END_INFO,
REQUEST_SPANK_ENVIRONMENT,
RESPONCE_SPANK_ENVIRONMENT,
+ REQUEST_STATS_INFO,
+ RESPONSE_STATS_INFO,
+ REQUEST_STATS_RESET,
+ RESPONSE_STATS_RESET,
REQUEST_UPDATE_JOB = 3001,
REQUEST_UPDATE_NODE,
@@ -275,7 +284,8 @@
RESPONSE_SLURMD_STATUS,
RESPONSE_SLURMCTLD_STATUS,
REQUEST_JOB_STEP_PIDS,
- RESPONSE_JOB_STEP_PIDS,
+ RESPONSE_JOB_STEP_PIDS,
+ REQUEST_FORWARD_DATA,
REQUEST_LAUNCH_TASKS = 6001,
RESPONSE_LAUNCH_TASKS,
@@ -303,6 +313,7 @@
SRUN_EXEC,
SRUN_STEP_MISSING,
SRUN_REQUEST_SUSPEND,
+ SRUN_STEP_SIGNAL, /* BluegeneQ: srun forwards signal to runjob */
PMI_KVS_PUT_REQ = 7201,
PMI_KVS_PUT_RESP,
@@ -409,6 +420,10 @@
uint32_t assoc_id; /* association ID */
char *cluster; /* cluster name */
+ uint64_t cpu_run_mins; /* currently running cpu-minutes
+ * = grp_used_cpu_run_secs / 60 */
+ uint64_t grp_cpu_mins; /* cpu-minute limit */
+
char *name; /* name */
char *parent; /* parent name */
@@ -449,7 +464,6 @@
typedef struct priority_factors_request_msg {
List job_id_list;
List uid_list;
- uid_t uid; /* used as a stop gap to verify auth DO NOT PACK */
} priority_factors_request_msg_t;
typedef struct priority_factors_response_msg {
@@ -556,6 +570,10 @@
switch_node_info_t *switch_nodeinfo;
} epilog_complete_msg_t;
+typedef struct reboot_msg {
+ char *node_list;
+} reboot_msg_t;
+
typedef struct shutdown_msg {
uint16_t options;
} shutdown_msg_t;
@@ -682,6 +700,7 @@
char **spank_job_env;
uint32_t spank_job_env_size;
dynamic_plugin_data_t *select_jobinfo; /* select context, opaque data */
+ char *alias_list; /* node name/address/hostnamne aliases */
} launch_tasks_request_msg_t;
typedef struct task_user_managed_io_msg {
@@ -765,6 +784,7 @@
uint16_t *cpus_per_node;/* cpus per node */
uint32_t *cpu_count_reps;/* how many nodes have same cpu count */
uint16_t cpus_per_task; /* number of CPUs requested per task */
+ char *alias_list; /* node name/address/hostnamne aliases */
char *nodes; /* list of nodes allocated to job_step */
char *script; /* the actual job script, default NONE */
char *std_err; /* pathname of stderr */
@@ -885,6 +905,12 @@
time_t start_time; /* time when job will start */
} will_run_response_msg_t;
+typedef struct forward_data_msg {
+ char *address;
+ uint32_t len;
+ char *data;
+} forward_data_msg_t;
+
/*****************************************************************************\
* Slurm API Message Types
\*****************************************************************************/
@@ -947,7 +973,7 @@
* values from the "src" slurm_msg_t structure.
* IN src - Pointer to the initialized message from which "dest" will
* be initialized.
- * OUT dest - Pointer to the slurm_msg_t which will be intialized.
+ * OUT dest - Pointer to the slurm_msg_t which will be initialized.
* NOTE: the "dest" structure will contain pointers into the contents of "src".
*/
extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src);
@@ -972,6 +998,8 @@
front_end_info_request_msg_t *msg);
extern void slurm_free_node_info_request_msg(node_info_request_msg_t *msg);
extern void slurm_free_part_info_request_msg(part_info_request_msg_t *msg);
+extern void slurm_free_stats_info_request_msg(stats_info_request_msg_t *msg);
+extern void slurm_free_stats_response_msg(stats_info_response_msg_t *msg);
extern void slurm_free_resv_info_request_msg(resv_info_request_msg_t *msg);
extern void slurm_free_set_debug_flags_msg(set_debug_flags_msg_t *msg);
extern void slurm_free_set_debug_level_msg(set_debug_level_msg_t *msg);
@@ -983,10 +1011,13 @@
priority_factors_request_msg_t *msg);
extern void slurm_free_priority_factors_response_msg(
priority_factors_response_msg_t *msg);
+extern void slurm_free_forward_data_msg(forward_data_msg_t *msg);
#define slurm_free_timelimit_msg(msg) \
slurm_free_kill_job_msg(msg)
+extern void slurm_free_reboot_msg(reboot_msg_t * msg);
+
extern void slurm_free_shutdown_msg(shutdown_msg_t * msg);
extern void slurm_free_job_desc_msg(job_desc_msg_t * msg);
@@ -1085,6 +1116,7 @@
extern void slurm_free_block_info_msg(block_info_msg_t *block_info_msg);
extern void slurm_free_block_info_request_msg(
block_info_request_msg_t *msg);
+
extern void slurm_free_job_notify_msg(job_notify_msg_t * msg);
extern void slurm_free_accounting_update_msg(accounting_update_msg_t *msg);
@@ -1097,6 +1129,9 @@
extern char *preempt_mode_string(uint16_t preempt_mode);
extern uint16_t preempt_mode_num(const char *preempt_mode);
+extern char *log_num2string(uint16_t inx);
+extern uint16_t log_string2num(char *name);
+
extern char *sched_param_type_string(uint16_t select_type_param);
extern char *job_reason_string(enum job_state_reason inx);
extern char *job_state_string(uint16_t inx);
diff --git a/src/common/slurm_protocol_mongo_common.h b/src/common/slurm_protocol_mongo_common.h
deleted file mode 100644
index 6990143..0000000
--- a/src/common/slurm_protocol_mongo_common.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*****************************************************************************\
- * slurm_protocol_mongo_common.h - alternative communications protocol to
- * TCP sockets. As of 11/18/2002 it is unclear that this communications
- * protocol will be fully developed.
- ****************************************************************************
- * Copyright (C) 2002 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Kevin Tew <tew1@llnl.gov>
- * CODE-OCEC-09-009. All rights reserved.
- *
- * This file is part of SLURM, a resource management program.
- * For details, see <http://www.schedmd.com/slurmdocs/>.
- * Please also read the included file: DISCLAIMER.
- *
- * SLURM is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * In addition, as a special exception, the copyright holders give permission
- * to link the code of portions of this program with the OpenSSL library under
- * certain conditions as described in each individual source file, and
- * distribute linked combinations including the two. You must obey the GNU
- * General Public License in all respects for all of the code used other than
- * OpenSSL. If you modify file(s) with this exception, you may extend this
- * exception to your version of the file(s), but you are not obligated to do
- * so. If you do not wish to do so, delete this exception statement from your
- * version. If you delete this exception statement from all source files in
- * the program, then also delete it here.
- *
- * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with SLURM; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-\*****************************************************************************/
-
-#ifndef _SLURM_PROTOCOL_MONGO_COMMON_H
-#define _SLURM_PROTOCOL_MONGO_COMMON_H
-
-#if HAVE_CONFIG_H
-# include "config.h"
-# if HAVE_INTTYPES_H
-# include <inttypes.h>
-# else
-# if HAVE_STDINT_H
-# include <stdint.h>
-# endif
-# endif /* HAVE_INTTYPES_H */
-#else /* !HAVE_CONFIG_H */
-# include <inttypes.h>
-#endif /* HAVE_CONFIG_H */
-
-#include <netinet/in.h>
-
-#define AF_SLURM AF_INET
-#define SLURM_INADDR_ANY 0x00000000
-
-/* LINUX SPECIFIC */
-/* this is the slurm equivalent of the operating system file descriptor,
- * which in linux is just an int */
-typedef uint32_t slurm_fd_t ;
-
-/* this is the slurm equivalent of the BSD sockets sockaddr */
-typedef struct mongo_addr_t slurm_addr_t ;
-/* this is the slurm equivalent of the BSD sockets fd_set */
-typedef fd_set slurm_fd_set ;
-/*struct kevin {
- int16_t family ;
- uint16_t port ;
- uint32_t address ;
- char pad[16 - sizeof ( int16_t ) -
- sizeof (uint16_t) - sizeof (uint32_t) ] ;
-} ;
-*/
-
-/* SLURM datatypes */
-/* this is a custom data type to describe the slurm msg type type
- * that is placed in the slurm protocol header
- * while just an short now, it may change in the future */
-/* Now defined in ../../src/common/slurm_protocol_defs.h
- * typedef uint16_t slurm_msg_type_t ;
- */
-
-#endif
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index a95c528..84bfab8 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -73,6 +73,7 @@
#define _pack_front_end_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
#define _pack_node_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
#define _pack_partition_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
+#define _pack_stats_response_msg(msg,buf) _pack_buffer_msg(msg,buf)
#define _pack_reserve_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
static void _pack_assoc_shares_object(void *in, Buf buffer,
@@ -275,6 +276,11 @@
launch_tasks_response_msg_t **msg_ptr, Buf buffer,
uint16_t protocol_version);
+static void _pack_reboot_msg(reboot_msg_t * msg, Buf buffer,
+ uint16_t protocol_version);
+static int _unpack_reboot_msg(reboot_msg_t ** msg_ptr, Buf buffer,
+ uint16_t protocol_version);
+
static void _pack_shutdown_msg(shutdown_msg_t * msg, Buf buffer,
uint16_t protocol_version);
static int _unpack_shutdown_msg(shutdown_msg_t ** msg_ptr, Buf buffer,
@@ -599,6 +605,19 @@
static int _unpack_spank_env_responce_msg(spank_env_responce_msg_t ** msg_ptr,
Buf buffer, uint16_t protocol_version);
+
+static void _pack_stats_request_msg(stats_info_request_msg_t *msg, Buf buffer,
+ uint16_t protocol_version);
+static int _unpack_stats_request_msg(stats_info_request_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version);
+static int _unpack_stats_response_msg(stats_info_response_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version);
+
+static void _pack_forward_data_msg(forward_data_msg_t *msg,
+ Buf buffer, uint16_t protocol_version);
+static int _unpack_forward_data_msg(forward_data_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version);
+
/* pack_header
* packs a slurm protocol header that precedes every slurm message
* IN header - the header structure to pack
@@ -757,6 +776,10 @@
case REQUEST_TOPO_INFO:
/* Message contains no body/information */
break;
+ case REQUEST_REBOOT_NODES:
+ _pack_reboot_msg((reboot_msg_t *)msg->data, buffer,
+ msg->protocol_version);
+ break;
case REQUEST_SHUTDOWN:
_pack_shutdown_msg((shutdown_msg_t *) msg->data, buffer,
msg->protocol_version);
@@ -871,6 +894,7 @@
msg->protocol_version);
break;
case REQUEST_CANCEL_JOB_STEP:
+ case SRUN_STEP_SIGNAL:
_pack_job_step_kill_msg((job_step_kill_msg_t *)
msg->data, buffer,
msg->protocol_version);
@@ -1159,6 +1183,21 @@
(spank_env_responce_msg_t *)msg->data, buffer,
msg->protocol_version);
break;
+
+ case REQUEST_STATS_INFO:
+ _pack_stats_request_msg((stats_info_request_msg_t *)msg->data,
+ buffer, msg->protocol_version);
+ break;
+
+ case RESPONSE_STATS_INFO:
+ _pack_stats_response_msg((slurm_msg_t *)msg, buffer);
+ break;
+
+ case REQUEST_FORWARD_DATA:
+ _pack_forward_data_msg((forward_data_msg_t *)msg->data,
+ buffer, msg->protocol_version);
+ break;
+
default:
debug("No pack method for msg type %u", msg->msg_type);
return EINVAL;
@@ -1181,6 +1220,17 @@
int rc = SLURM_SUCCESS;
msg->data = NULL; /* Initialize to no data for now */
+ /* In older versions of SLURM some RPC's were not initialized
+ correctly and NO_VAL was sent along as the
+ protocol_version. This was fixed in 2.4.2 and was noticed
+ when upgrading from 2.3 -> 2.5 when REQUEST_STEP_COMPLETE
+ was called. It hadn't changed for a while so
+ SLURM_2_4_PROTOCOL_VERSION should fix the issue when this
+ happens.
+ */
+ if (msg->protocol_version == (uint16_t)NO_VAL)
+ msg->protocol_version = SLURM_2_4_PROTOCOL_VERSION;
+
switch (msg->msg_type) {
case REQUEST_NODE_INFO:
rc = _unpack_node_info_request_msg((node_info_request_msg_t **)
@@ -1264,6 +1314,10 @@
case REQUEST_TOPO_INFO:
/* Message contains no body/information */
break;
+ case REQUEST_REBOOT_NODES:
+ rc = _unpack_reboot_msg((reboot_msg_t **) & (msg->data),
+ buffer, msg->protocol_version);
+ break;
case REQUEST_SHUTDOWN:
rc = _unpack_shutdown_msg((shutdown_msg_t **) & (msg->data),
buffer,
@@ -1388,6 +1442,7 @@
msg->protocol_version);
break;
case REQUEST_CANCEL_JOB_STEP:
+ case SRUN_STEP_SIGNAL:
rc = _unpack_job_step_kill_msg((job_step_kill_msg_t **)
& (msg->data), buffer,
msg->protocol_version);
@@ -1703,6 +1758,24 @@
(spank_env_responce_msg_t **)&msg->data, buffer,
msg->protocol_version);
break;
+
+ case REQUEST_STATS_INFO:
+ _unpack_stats_request_msg((stats_info_request_msg_t **)
+ &msg->data, buffer,
+ msg->protocol_version);
+ break;
+
+ case RESPONSE_STATS_INFO:
+ _unpack_stats_response_msg((stats_info_response_msg_t **)
+ &msg->data, buffer,
+ msg->protocol_version);
+ break;
+
+ case REQUEST_FORWARD_DATA:
+ rc = _unpack_forward_data_msg((forward_data_msg_t **)&msg->data,
+ buffer, msg->protocol_version);
+ break;
+
default:
debug("No unpack method for msg type %u", msg->msg_type);
return EINVAL;
@@ -1719,39 +1792,81 @@
{
association_shares_object_t *object = (association_shares_object_t *)in;
- if(!object) {
- pack32(0, buffer);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ if(!object) {
+ pack32(0, buffer);
- packnull(buffer);
- packnull(buffer);
- packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
- packdouble(0, buffer);
- pack32(0, buffer);
+ packdouble(0, buffer);
+ pack32(0, buffer);
- packdouble(0, buffer);
- packdouble(0, buffer);
- pack64(0, buffer);
+ packdouble(0, buffer);
+ packdouble(0, buffer);
+ pack64(0, buffer);
- pack16(0, buffer);
+ pack64(0, buffer);
+ pack64(0, buffer);
- return;
+ pack16(0, buffer);
+
+ return;
+ }
+
+ pack32(object->assoc_id, buffer);
+
+ packstr(object->cluster, buffer);
+ packstr(object->name, buffer);
+ packstr(object->parent, buffer);
+
+ packdouble(object->shares_norm, buffer);
+ pack32(object->shares_raw, buffer);
+
+ packdouble(object->usage_efctv, buffer);
+ packdouble(object->usage_norm, buffer);
+ pack64(object->usage_raw, buffer);
+
+ pack64(object->grp_cpu_mins, buffer);
+ pack64(object->cpu_run_mins, buffer);
+
+ pack16(object->user, buffer);
+ } else {
+ if(!object) {
+ pack32(0, buffer);
+
+ packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
+
+ packdouble(0, buffer);
+ pack32(0, buffer);
+
+ packdouble(0, buffer);
+ packdouble(0, buffer);
+ pack64(0, buffer);
+
+ pack16(0, buffer);
+
+ return;
+ }
+
+ pack32(object->assoc_id, buffer);
+
+ packstr(object->cluster, buffer);
+ packstr(object->name, buffer);
+ packstr(object->parent, buffer);
+
+ packdouble(object->shares_norm, buffer);
+ pack32(object->shares_raw, buffer);
+
+ packdouble(object->usage_efctv, buffer);
+ packdouble(object->usage_norm, buffer);
+ pack64(object->usage_raw, buffer);
+
+ pack16(object->user, buffer);
}
-
- pack32(object->assoc_id, buffer);
-
- packstr(object->cluster, buffer);
- packstr(object->name, buffer);
- packstr(object->parent, buffer);
-
- packdouble(object->shares_norm, buffer);
- pack32(object->shares_raw, buffer);
-
- packdouble(object->usage_efctv, buffer);
- packdouble(object->usage_norm, buffer);
- pack64(object->usage_raw, buffer);
-
- pack16(object->user, buffer);
}
static int _unpack_assoc_shares_object(void **object, Buf buffer,
@@ -1762,21 +1877,45 @@
xmalloc(sizeof(association_shares_object_t));
*object = (void *) object_ptr;
- safe_unpack32(&object_ptr->assoc_id, buffer);
- safe_unpackstr_xmalloc(&object_ptr->cluster, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&object_ptr->parent, &uint32_tmp, buffer);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&object_ptr->assoc_id, buffer);
- safe_unpackdouble(&object_ptr->shares_norm, buffer);
- safe_unpack32(&object_ptr->shares_raw, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->cluster,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->parent,
+ &uint32_tmp, buffer);
- safe_unpackdouble(&object_ptr->usage_efctv, buffer);
- safe_unpackdouble(&object_ptr->usage_norm, buffer);
- safe_unpack64(&object_ptr->usage_raw, buffer);
+ safe_unpackdouble(&object_ptr->shares_norm, buffer);
+ safe_unpack32(&object_ptr->shares_raw, buffer);
- safe_unpack16(&object_ptr->user, buffer);
+ safe_unpackdouble(&object_ptr->usage_efctv, buffer);
+ safe_unpackdouble(&object_ptr->usage_norm, buffer);
+ safe_unpack64(&object_ptr->usage_raw, buffer);
+ safe_unpack64(&object_ptr->grp_cpu_mins, buffer);
+ safe_unpack64(&object_ptr->cpu_run_mins, buffer);
+
+ safe_unpack16(&object_ptr->user, buffer);
+ } else {
+ safe_unpack32(&object_ptr->assoc_id, buffer);
+
+ safe_unpackstr_xmalloc(&object_ptr->cluster,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->parent,
+ &uint32_tmp, buffer);
+
+ safe_unpackdouble(&object_ptr->shares_norm, buffer);
+ safe_unpack32(&object_ptr->shares_raw, buffer);
+
+ safe_unpackdouble(&object_ptr->usage_efctv, buffer);
+ safe_unpackdouble(&object_ptr->usage_norm, buffer);
+ safe_unpack64(&object_ptr->usage_raw, buffer);
+
+ safe_unpack16(&object_ptr->user, buffer);
+ }
return SLURM_SUCCESS;
unpack_error:
@@ -2164,7 +2303,17 @@
uint16_t protocol_version)
{
xassert(msg != NULL);
- if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ packstr(msg->node_addr, buffer);
+ packstr(msg->node_hostname, buffer);
+ packstr(msg->node_names, buffer);
+ pack16(msg->node_state, buffer);
+ packstr(msg->features, buffer);
+ packstr(msg->gres, buffer);
+ packstr(msg->reason, buffer);
+ pack32(msg->weight, buffer);
+ pack32(msg->reason_uid, buffer);
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
packstr(msg->node_names, buffer);
pack16(msg->node_state, buffer);
packstr(msg->features, buffer);
@@ -2193,7 +2342,20 @@
tmp_ptr = xmalloc(sizeof(update_node_msg_t));
*msg = tmp_ptr;
- if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpackstr_xmalloc(&tmp_ptr->node_addr,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->node_hostname,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->node_names,
+ &uint32_tmp, buffer);
+ safe_unpack16(&tmp_ptr->node_state, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->features, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->gres, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->reason, &uint32_tmp, buffer);
+ safe_unpack32(&tmp_ptr->weight, buffer);
+ safe_unpack32(&tmp_ptr->reason_uid, buffer);
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc(&tmp_ptr->node_names,
&uint32_tmp, buffer);
safe_unpack16(&tmp_ptr->node_state, buffer);
@@ -2416,6 +2578,7 @@
pack32(msg->error_code, buffer);
pack32(msg->job_id, buffer);
pack32(msg->pn_min_memory, buffer);
+ packstr(msg->alias_list, buffer);
packstr(msg->node_list, buffer);
pack32(msg->num_cpu_groups, buffer);
@@ -2444,32 +2607,62 @@
*msg = tmp_ptr;
/* load the data values */
- safe_unpack32(&tmp_ptr->error_code, buffer);
- safe_unpack32(&tmp_ptr->job_id, buffer);
- safe_unpack32(&tmp_ptr->pn_min_memory, buffer);
- safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp, buffer);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&tmp_ptr->error_code, buffer);
+ safe_unpack32(&tmp_ptr->job_id, buffer);
+ safe_unpack32(&tmp_ptr->pn_min_memory, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->alias_list, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp,
+ buffer);
- safe_unpack32(&tmp_ptr->num_cpu_groups, buffer);
- if (tmp_ptr->num_cpu_groups > 0) {
- safe_unpack16_array(&tmp_ptr->cpus_per_node, &uint32_tmp,
- buffer);
- if (tmp_ptr->num_cpu_groups != uint32_tmp)
- goto unpack_error;
- safe_unpack32_array(&tmp_ptr->cpu_count_reps, &uint32_tmp,
- buffer);
- if (tmp_ptr->num_cpu_groups != uint32_tmp)
+ safe_unpack32(&tmp_ptr->num_cpu_groups, buffer);
+ if (tmp_ptr->num_cpu_groups > 0) {
+ safe_unpack16_array(&tmp_ptr->cpus_per_node,
+ &uint32_tmp, buffer);
+ if (tmp_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ safe_unpack32_array(&tmp_ptr->cpu_count_reps,
+ &uint32_tmp, buffer);
+ if (tmp_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ } else {
+ tmp_ptr->cpus_per_node = NULL;
+ tmp_ptr->cpu_count_reps = NULL;
+ }
+
+ safe_unpack32(&tmp_ptr->node_cnt, buffer);
+ if (select_g_select_jobinfo_unpack(&tmp_ptr->select_jobinfo,
+ buffer, protocol_version))
goto unpack_error;
} else {
- tmp_ptr->cpus_per_node = NULL;
- tmp_ptr->cpu_count_reps = NULL;
+ safe_unpack32(&tmp_ptr->error_code, buffer);
+ safe_unpack32(&tmp_ptr->job_id, buffer);
+ safe_unpack32(&tmp_ptr->pn_min_memory, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&tmp_ptr->num_cpu_groups, buffer);
+ if (tmp_ptr->num_cpu_groups > 0) {
+ safe_unpack16_array(&tmp_ptr->cpus_per_node,
+ &uint32_tmp, buffer);
+ if (tmp_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ safe_unpack32_array(&tmp_ptr->cpu_count_reps,
+ &uint32_tmp, buffer);
+ if (tmp_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ } else {
+ tmp_ptr->cpus_per_node = NULL;
+ tmp_ptr->cpu_count_reps = NULL;
+ }
+
+ safe_unpack32(&tmp_ptr->node_cnt, buffer);
+ if (select_g_select_jobinfo_unpack(&tmp_ptr->select_jobinfo,
+ buffer, protocol_version))
+ goto unpack_error;
}
- safe_unpack32(&tmp_ptr->node_cnt, buffer);
-
- if (select_g_select_jobinfo_unpack(&tmp_ptr->select_jobinfo, buffer,
- protocol_version))
- goto unpack_error;
-
return SLURM_SUCCESS;
unpack_error:
@@ -2970,15 +3163,41 @@
_pack_update_resv_msg(resv_desc_msg_t * msg, Buf buffer,
uint16_t protocol_version)
{
+ uint32_t array_len;
xassert(msg != NULL);
- if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
packstr(msg->name, buffer);
pack_time(msg->start_time, buffer);
pack_time(msg->end_time, buffer);
pack32(msg->duration, buffer);
pack16(msg->flags, buffer);
- pack32(msg->node_cnt, buffer);
+ if (msg->node_cnt) {
+ for (array_len = 0; msg->node_cnt[array_len];
+ array_len++) {
+ /* determine array length */
+ }
+ array_len++; /* Include trailing zero */
+ } else
+ array_len = 0;
+ pack32_array(msg->node_cnt, array_len, buffer);
+ packstr(msg->node_list, buffer);
+ packstr(msg->features, buffer);
+ packstr(msg->licenses, buffer);
+ packstr(msg->partition, buffer);
+
+ packstr(msg->users, buffer);
+ packstr(msg->accounts, buffer);
+ } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ packstr(msg->name, buffer);
+ pack_time(msg->start_time, buffer);
+ pack_time(msg->end_time, buffer);
+ pack32(msg->duration, buffer);
+ pack16(msg->flags, buffer);
+ if (msg->node_cnt)
+ pack32(msg->node_cnt[0], buffer);
+ else
+ pack32(NO_VAL, buffer);
packstr(msg->node_list, buffer);
packstr(msg->features, buffer);
packstr(msg->licenses, buffer);
@@ -3002,13 +3221,46 @@
tmp_ptr = xmalloc(sizeof(resv_desc_msg_t));
*msg = tmp_ptr;
- if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer);
safe_unpack_time(&tmp_ptr->start_time, buffer);
safe_unpack_time(&tmp_ptr->end_time, buffer);
safe_unpack32(&tmp_ptr->duration, buffer);
safe_unpack16(&tmp_ptr->flags, buffer);
- safe_unpack32(&tmp_ptr->node_cnt, buffer);
+ safe_unpack32_array(&tmp_ptr->node_cnt, &uint32_tmp, buffer);
+ if (uint32_tmp > 0) {
+ /* Must be zero terminated */
+ if (tmp_ptr->node_cnt[uint32_tmp-1] != 0)
+ goto unpack_error;
+ } else {
+ /* This avoids a pointer to a zero length buffer */
+ xfree(tmp_ptr->node_cnt);
+ }
+ safe_unpackstr_xmalloc(&tmp_ptr->node_list,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->features,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->licenses,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->partition,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&tmp_ptr->users,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&tmp_ptr->accounts,
+ &uint32_tmp, buffer);
+ } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ uint32_t node_cnt;
+ safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer);
+ safe_unpack_time(&tmp_ptr->start_time, buffer);
+ safe_unpack_time(&tmp_ptr->end_time, buffer);
+ safe_unpack32(&tmp_ptr->duration, buffer);
+ safe_unpack16(&tmp_ptr->flags, buffer);
+ safe_unpack32(&node_cnt, buffer);
+ if (node_cnt != NO_VAL) {
+ tmp_ptr->node_cnt = xmalloc(sizeof(uint32_t) * 2);
+ tmp_ptr->node_cnt[0] = node_cnt;
+ }
safe_unpackstr_xmalloc(&tmp_ptr->node_list,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&tmp_ptr->features,
@@ -3023,6 +3275,7 @@
safe_unpackstr_xmalloc(&tmp_ptr->accounts,
&uint32_tmp, buffer);
}
+
return SLURM_SUCCESS;
unpack_error:
@@ -4286,7 +4539,207 @@
uint16_t uint16_tmp;
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ pack_time(build_ptr->last_update, buffer);
+
+ pack16(build_ptr->accounting_storage_enforce, buffer);
+ packstr(build_ptr->accounting_storage_backup_host, buffer);
+ packstr(build_ptr->accounting_storage_host, buffer);
+ packstr(build_ptr->accounting_storage_loc, buffer);
+ pack32(build_ptr->accounting_storage_port, buffer);
+ packstr(build_ptr->accounting_storage_type, buffer);
+ packstr(build_ptr->accounting_storage_user, buffer);
+ pack16(build_ptr->acctng_store_job_comment, buffer);
+
+ packstr(build_ptr->authtype, buffer);
+
+ packstr(build_ptr->backup_addr, buffer);
+ packstr(build_ptr->backup_controller, buffer);
+ pack16(build_ptr->batch_start_timeout, buffer);
+ pack_time(build_ptr->boot_time, buffer);
+
+ packstr(build_ptr->checkpoint_type, buffer);
+ packstr(build_ptr->cluster_name, buffer);
+ pack16(build_ptr->complete_wait, buffer);
+ packstr(build_ptr->control_addr, buffer);
+ packstr(build_ptr->control_machine, buffer);
+ packstr(build_ptr->crypto_type, buffer);
+
+ pack32(build_ptr->def_mem_per_cpu, buffer);
+ pack32(build_ptr->debug_flags, buffer);
+ pack16(build_ptr->disable_root_jobs, buffer);
+
+ pack16(build_ptr->enforce_part_limits, buffer);
+ packstr(build_ptr->epilog, buffer);
+ pack32(build_ptr->epilog_msg_time, buffer);
+ packstr(build_ptr->epilog_slurmctld, buffer);
+
+ pack16(build_ptr->fast_schedule, buffer);
+ pack32(build_ptr->first_job_id, buffer);
+
+ pack16(build_ptr->get_env_timeout, buffer);
+ packstr(build_ptr->gres_plugins, buffer);
+ pack16(build_ptr->group_info, buffer);
+
+ pack32(build_ptr->hash_val, buffer);
+
+ pack16(build_ptr->health_check_interval, buffer);
+ packstr(build_ptr->health_check_program, buffer);
+
+ pack16(build_ptr->inactive_limit, buffer);
+
+ pack16(build_ptr->job_acct_gather_freq, buffer);
+ packstr(build_ptr->job_acct_gather_type, buffer);
+
+ packstr(build_ptr->job_ckpt_dir, buffer);
+
+ packstr(build_ptr->job_comp_host, buffer);
+ packstr(build_ptr->job_comp_loc, buffer);
+ pack32((uint32_t)build_ptr->job_comp_port, buffer);
+ packstr(build_ptr->job_comp_type, buffer);
+ packstr(build_ptr->job_comp_user, buffer);
+
+ packstr(build_ptr->job_credential_private_key, buffer);
+ packstr(build_ptr->job_credential_public_certificate, buffer);
+ pack16(build_ptr->job_file_append, buffer);
+ pack16(build_ptr->job_requeue, buffer);
+ packstr(build_ptr->job_submit_plugins, buffer);
+
+ pack16(build_ptr->kill_on_bad_exit, buffer);
+ pack16(build_ptr->kill_wait, buffer);
+
+ packstr(build_ptr->licenses, buffer);
+
+ packstr(build_ptr->mail_prog, buffer);
+ pack32(build_ptr->max_job_cnt, buffer);
+ pack32(build_ptr->max_job_id, buffer);
+ pack32(build_ptr->max_mem_per_cpu, buffer);
+ pack32(build_ptr->max_step_cnt, buffer);
+ pack16(build_ptr->max_tasks_per_node, buffer);
+ pack16(build_ptr->min_job_age, buffer);
+ packstr(build_ptr->mpi_default, buffer);
+ packstr(build_ptr->mpi_params, buffer);
+ pack16(build_ptr->msg_timeout, buffer);
+
+ pack32(build_ptr->next_job_id, buffer);
+ packstr(build_ptr->node_prefix, buffer);
+
+ pack16(build_ptr->over_time_limit, buffer);
+
+ packstr(build_ptr->plugindir, buffer);
+ packstr(build_ptr->plugstack, buffer);
+ pack16(build_ptr->preempt_mode, buffer);
+ packstr(build_ptr->preempt_type, buffer);
+
+ pack32(build_ptr->priority_decay_hl, buffer);
+ pack32(build_ptr->priority_calc_period, buffer);
+ pack16(build_ptr->priority_favor_small, buffer);
+ pack16(build_ptr->priority_flags, buffer);
+ pack32(build_ptr->priority_max_age, buffer);
+ pack16(build_ptr->priority_reset_period, buffer);
+ packstr(build_ptr->priority_type, buffer);
+ pack32(build_ptr->priority_weight_age, buffer);
+ pack32(build_ptr->priority_weight_fs, buffer);
+ pack32(build_ptr->priority_weight_js, buffer);
+ pack32(build_ptr->priority_weight_part, buffer);
+ pack32(build_ptr->priority_weight_qos, buffer);
+
+ pack16(build_ptr->private_data, buffer);
+ packstr(build_ptr->proctrack_type, buffer);
+ packstr(build_ptr->prolog, buffer);
+ packstr(build_ptr->prolog_slurmctld, buffer);
+ pack16(build_ptr->propagate_prio_process, buffer);
+ packstr(build_ptr->propagate_rlimits, buffer);
+ packstr(build_ptr->propagate_rlimits_except, buffer);
+
+ packstr(build_ptr->reboot_program, buffer);
+ pack16(build_ptr->reconfig_flags, buffer);
+ packstr(build_ptr->resume_program, buffer);
+ pack16(build_ptr->resume_rate, buffer);
+ pack16(build_ptr->resume_timeout, buffer);
+ pack16(build_ptr->resv_over_run, buffer);
+ pack16(build_ptr->ret2service, buffer);
+
+ packstr(build_ptr->salloc_default_command, buffer);
+ packstr(build_ptr->sched_params, buffer);
+ pack16(build_ptr->schedport, buffer);
+ pack16(build_ptr->schedrootfltr, buffer);
+ packstr(build_ptr->sched_logfile, buffer);
+ pack16(build_ptr->sched_log_level, buffer);
+ pack16(build_ptr->sched_time_slice, buffer);
+ packstr(build_ptr->schedtype, buffer);
+ packstr(build_ptr->select_type, buffer);
+ if(build_ptr->select_conf_key_pairs)
+ count = list_count(build_ptr->select_conf_key_pairs);
+
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ ListIterator itr = list_iterator_create(
+ (List)build_ptr->select_conf_key_pairs);
+ config_key_pair_t *key_pair = NULL;
+ while ((key_pair = list_next(itr))) {
+ pack_config_key_pair(key_pair,
+ protocol_version, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack16(build_ptr->select_type_param, buffer);
+
+ packstr(build_ptr->slurm_conf, buffer);
+ pack32(build_ptr->slurm_user_id, buffer);
+ packstr(build_ptr->slurm_user_name, buffer);
+ pack32(build_ptr->slurmd_user_id, buffer);
+ packstr(build_ptr->slurmd_user_name, buffer);
+
+ pack16(build_ptr->slurmctld_debug, buffer);
+ packstr(build_ptr->slurmctld_logfile, buffer);
+ packstr(build_ptr->slurmctld_pidfile, buffer);
+ pack32(build_ptr->slurmctld_port, buffer);
+ pack16(build_ptr->slurmctld_port_count, buffer);
+ pack16(build_ptr->slurmctld_timeout, buffer);
+
+ pack16(build_ptr->slurmd_debug, buffer);
+ packstr(build_ptr->slurmd_logfile, buffer);
+ packstr(build_ptr->slurmd_pidfile, buffer);
+ if (!(cluster_flags & CLUSTER_FLAG_MULTSD))
+ pack32(build_ptr->slurmd_port, buffer);
+
+ packstr(build_ptr->slurmd_spooldir, buffer);
+ pack16(build_ptr->slurmd_timeout, buffer);
+ packstr(build_ptr->srun_epilog, buffer);
+ packstr(build_ptr->srun_prolog, buffer);
+ packstr(build_ptr->state_save_location, buffer);
+ packstr(build_ptr->suspend_exc_nodes, buffer);
+ packstr(build_ptr->suspend_exc_parts, buffer);
+ packstr(build_ptr->suspend_program, buffer);
+ pack16(build_ptr->suspend_rate, buffer);
+ pack32(build_ptr->suspend_time, buffer);
+ pack16(build_ptr->suspend_timeout, buffer);
+ packstr(build_ptr->switch_type, buffer);
+
+ packstr(build_ptr->task_epilog, buffer);
+ packstr(build_ptr->task_prolog, buffer);
+ packstr(build_ptr->task_plugin, buffer);
+ pack16(build_ptr->task_plugin_param, buffer);
+ packstr(build_ptr->tmp_fs, buffer);
+ packstr(build_ptr->topology_plugin, buffer);
+ pack16(build_ptr->track_wckey, buffer);
+ pack16(build_ptr->tree_width, buffer);
+
+ pack16(build_ptr->use_pam, buffer);
+ packstr(build_ptr->unkillable_program, buffer);
+ pack16(build_ptr->unkillable_timeout, buffer);
+ packstr(build_ptr->version, buffer);
+ pack16(build_ptr->vsize_factor, buffer);
+
+ pack16(build_ptr->wait_time, buffer);
+
+ pack16(build_ptr->z_16, buffer);
+ pack32(build_ptr->z_32, buffer);
+ packstr(build_ptr->z_char, buffer);
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
pack_time(build_ptr->last_update, buffer);
pack16(build_ptr->accounting_storage_enforce, buffer);
@@ -4890,7 +5343,281 @@
build_ptr->hash_val = NO_VAL;
/* load the data values */
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ /* unpack timestamp of snapshot */
+ safe_unpack_time(&build_ptr->last_update, buffer);
+
+ safe_unpack16(&build_ptr->accounting_storage_enforce, buffer);
+ safe_unpackstr_xmalloc(
+ &build_ptr->accounting_storage_backup_host,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->accounting_storage_host,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->accounting_storage_loc,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->accounting_storage_port, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->accounting_storage_type,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->accounting_storage_user,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->acctng_store_job_comment, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->authtype,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->backup_addr,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->backup_controller,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->batch_start_timeout, buffer);
+ safe_unpack_time(&build_ptr->boot_time, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->checkpoint_type,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->cluster_name,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->complete_wait, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->control_addr,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->control_machine,
+ &uint32_tmp,buffer);
+ safe_unpackstr_xmalloc(&build_ptr->crypto_type, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&build_ptr->def_mem_per_cpu, buffer);
+ safe_unpack32(&build_ptr->debug_flags, buffer);
+ safe_unpack16(&build_ptr->disable_root_jobs, buffer);
+
+ safe_unpack16(&build_ptr->enforce_part_limits, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->epilog, &uint32_tmp,
+ buffer);
+ safe_unpack32(&build_ptr->epilog_msg_time, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->epilog_slurmctld,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&build_ptr->fast_schedule, buffer);
+ safe_unpack32(&build_ptr->first_job_id, buffer);
+
+ safe_unpack16(&build_ptr->get_env_timeout, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->gres_plugins,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->group_info, buffer);
+
+ safe_unpack32(&build_ptr->hash_val, buffer);
+
+ safe_unpack16(&build_ptr->health_check_interval, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->health_check_program,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&build_ptr->inactive_limit, buffer);
+
+ safe_unpack16(&build_ptr->job_acct_gather_freq, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->job_ckpt_dir,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->job_comp_host,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->job_comp_loc,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->job_comp_port, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->job_comp_type,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->job_comp_user,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->job_credential_private_key,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->
+ job_credential_public_certificate,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->job_file_append, buffer);
+ safe_unpack16(&build_ptr->job_requeue, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->job_submit_plugins,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&build_ptr->kill_on_bad_exit, buffer);
+ safe_unpack16(&build_ptr->kill_wait, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->licenses,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->mail_prog,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->max_job_cnt, buffer);
+ safe_unpack32(&build_ptr->max_job_id, buffer);
+ safe_unpack32(&build_ptr->max_mem_per_cpu, buffer);
+ safe_unpack32(&build_ptr->max_step_cnt, buffer);
+ safe_unpack16(&build_ptr->max_tasks_per_node, buffer);
+ safe_unpack16(&build_ptr->min_job_age, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->mpi_default,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->mpi_params,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->msg_timeout, buffer);
+
+ safe_unpack32(&build_ptr->next_job_id, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->node_prefix,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&build_ptr->over_time_limit, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->plugindir,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->plugstack,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->preempt_mode, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->preempt_type,
+ &uint32_tmp, buffer);
+
+ safe_unpack32(&build_ptr->priority_decay_hl, buffer);
+ safe_unpack32(&build_ptr->priority_calc_period, buffer);
+ safe_unpack16(&build_ptr->priority_favor_small, buffer);
+ safe_unpack16(&build_ptr->priority_flags, buffer);
+ safe_unpack32(&build_ptr->priority_max_age, buffer);
+ safe_unpack16(&build_ptr->priority_reset_period, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->priority_type, &uint32_tmp,
+ buffer);
+ safe_unpack32(&build_ptr->priority_weight_age, buffer);
+ safe_unpack32(&build_ptr->priority_weight_fs, buffer);
+ safe_unpack32(&build_ptr->priority_weight_js, buffer);
+ safe_unpack32(&build_ptr->priority_weight_part, buffer);
+ safe_unpack32(&build_ptr->priority_weight_qos, buffer);
+
+ safe_unpack16(&build_ptr->private_data, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->proctrack_type, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&build_ptr->prolog, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&build_ptr->prolog_slurmctld,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->propagate_prio_process, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->propagate_rlimits,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->propagate_rlimits_except,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->reboot_program, &uint32_tmp,
+ buffer);
+ safe_unpack16(&build_ptr->reconfig_flags, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->resume_program,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->resume_rate, buffer);
+ safe_unpack16(&build_ptr->resume_timeout, buffer);
+ safe_unpack16(&build_ptr->resv_over_run, buffer);
+ safe_unpack16(&build_ptr->ret2service, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->salloc_default_command,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->sched_params,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->schedport, buffer);
+ safe_unpack16(&build_ptr->schedrootfltr, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->sched_logfile,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->sched_log_level, buffer);
+ safe_unpack16(&build_ptr->sched_time_slice, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->schedtype,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->select_type,
+ &uint32_tmp, buffer);
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ List tmp_list = list_create(destroy_config_key_pair);
+ config_key_pair_t *object = NULL;
+ int i;
+ for (i=0; i<count; i++) {
+ if (unpack_config_key_pair(
+ (void *)&object, protocol_version,
+ buffer)
+ == SLURM_ERROR)
+ goto unpack_error;
+ list_append(tmp_list, object);
+ }
+ build_ptr->select_conf_key_pairs = (void *)tmp_list;
+ }
+
+ safe_unpack16(&build_ptr->select_type_param, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->slurm_conf,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->slurm_user_id, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurm_user_name,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->slurmd_user_id, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurmd_user_name,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&build_ptr->slurmctld_debug, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurmctld_logfile,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurmctld_pidfile,
+ &uint32_tmp, buffer);
+ safe_unpack32(&build_ptr->slurmctld_port, buffer);
+ safe_unpack16(&build_ptr->slurmctld_port_count, buffer);
+ safe_unpack16(&build_ptr->slurmctld_timeout, buffer);
+
+ safe_unpack16(&build_ptr->slurmd_debug, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurmd_logfile, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&build_ptr->slurmd_pidfile, &uint32_tmp,
+ buffer);
+ if (!(cluster_flags & CLUSTER_FLAG_MULTSD))
+ safe_unpack32(&build_ptr->slurmd_port, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->slurmd_spooldir,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->slurmd_timeout, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->srun_epilog,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->srun_prolog,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->state_save_location,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->suspend_exc_nodes,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->suspend_exc_parts,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->suspend_program,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->suspend_rate, buffer);
+ safe_unpack32(&build_ptr->suspend_time, buffer);
+ safe_unpack16(&build_ptr->suspend_timeout, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->switch_type,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&build_ptr->task_epilog,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->task_prolog,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->task_plugin,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->task_plugin_param, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->tmp_fs, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&build_ptr->topology_plugin,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->track_wckey, buffer);
+ safe_unpack16(&build_ptr->tree_width, buffer);
+
+ safe_unpack16(&build_ptr->use_pam, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->unkillable_program,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->unkillable_timeout, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->version,
+ &uint32_tmp, buffer);
+ safe_unpack16(&build_ptr->vsize_factor, buffer);
+
+ safe_unpack16(&build_ptr->wait_time, buffer);
+
+ safe_unpack16(&build_ptr->z_16, buffer);
+ safe_unpack32(&build_ptr->z_32, buffer);
+ safe_unpackstr_xmalloc(&build_ptr->z_char, &uint32_tmp,
+ buffer);
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
/* unpack timestamp of snapshot */
safe_unpack_time(&build_ptr->last_update, buffer);
@@ -6840,9 +7567,81 @@
uint16_t protocol_version)
{
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
- int i=0;
+ int i = 0;
xassert(msg != NULL);
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ pack32(msg->job_id, buffer);
+ pack32(msg->job_step_id, buffer);
+ pack32(msg->ntasks, buffer);
+ pack32(msg->uid, buffer);
+ pack32(msg->gid, buffer);
+ pack32(msg->job_mem_lim, buffer);
+ pack32(msg->step_mem_lim, buffer);
+
+ pack32(msg->nnodes, buffer);
+ pack16(msg->cpus_per_task, buffer);
+ pack16(msg->task_dist, buffer);
+
+ slurm_cred_pack(msg->cred, buffer);
+ for (i = 0; i < msg->nnodes; i++) {
+ pack16(msg->tasks_to_launch[i], buffer);
+ pack16(msg->cpus_allocated[i], buffer);
+ pack32_array(msg->global_task_ids[i],
+ (uint32_t) msg->tasks_to_launch[i],
+ buffer);
+ }
+ pack16(msg->num_resp_port, buffer);
+ for (i = 0; i < msg->num_resp_port; i++)
+ pack16(msg->resp_port[i], buffer);
+ slurm_pack_slurm_addr(&msg->orig_addr, buffer);
+ packstr_array(msg->env, msg->envc, buffer);
+ packstr_array(msg->spank_job_env, msg->spank_job_env_size,
+ buffer);
+ packstr(msg->cwd, buffer);
+ pack16(msg->cpu_bind_type, buffer);
+ packstr(msg->cpu_bind, buffer);
+ pack16(msg->mem_bind_type, buffer);
+ packstr(msg->mem_bind, buffer);
+ packstr_array(msg->argv, msg->argc, buffer);
+ pack16(msg->task_flags, buffer);
+ pack16(msg->multi_prog, buffer);
+ pack16(msg->user_managed_io, buffer);
+ if (msg->user_managed_io == 0) {
+ packstr(msg->ofname, buffer);
+ packstr(msg->efname, buffer);
+ packstr(msg->ifname, buffer);
+ pack8(msg->buffered_stdio, buffer);
+ pack8(msg->labelio, buffer);
+ pack16(msg->num_io_port, buffer);
+ for (i = 0; i < msg->num_io_port; i++)
+ pack16(msg->io_port[i], buffer);
+ }
+ packstr(msg->task_prolog, buffer);
+ packstr(msg->task_epilog, buffer);
+ pack16(msg->slurmd_debug, buffer);
+ switch_pack_jobinfo(msg->switch_job, buffer);
+ job_options_pack(msg->options, buffer);
+ packstr(msg->alias_list, buffer);
+ packstr(msg->complete_nodelist, buffer);
+
+ pack8(msg->open_mode, buffer);
+ pack8(msg->pty, buffer);
+ pack16(msg->acctg_freq, buffer);
+ packstr(msg->ckpt_dir, buffer);
+ packstr(msg->restart_dir, buffer);
+ if (!(cluster_flags & CLUSTER_FLAG_BG)) {
+ /* If on a Blue Gene cluster do not send this to the
+ * slurmstepd, it will overwrite the environment that
+ * ia already set up correctly for both the job and the
+ * step. The slurmstep treats this select_jobinfo as if
+ * were for the job instead of for the step.
+ */
+ select_g_select_jobinfo_pack(msg->select_jobinfo,
+ buffer,
+ protocol_version);
+ }
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
pack32(msg->job_id, buffer);
pack32(msg->job_step_id, buffer);
pack32(msg->ntasks, buffer);
@@ -7041,13 +7840,108 @@
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
uint32_t uint32_tmp;
launch_tasks_request_msg_t *msg;
- int i=0;
+ int i = 0;
xassert(msg_ptr != NULL);
msg = xmalloc(sizeof(launch_tasks_request_msg_t));
*msg_ptr = msg;
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&msg->job_id, buffer);
+ safe_unpack32(&msg->job_step_id, buffer);
+ safe_unpack32(&msg->ntasks, buffer);
+ safe_unpack32(&msg->uid, buffer);
+ safe_unpack32(&msg->gid, buffer);
+ safe_unpack32(&msg->job_mem_lim, buffer);
+ safe_unpack32(&msg->step_mem_lim, buffer);
+
+ safe_unpack32(&msg->nnodes, buffer);
+ safe_unpack16(&msg->cpus_per_task, buffer);
+ safe_unpack16(&msg->task_dist, buffer);
+
+ if (!(msg->cred = slurm_cred_unpack(buffer, protocol_version)))
+ goto unpack_error;
+ msg->tasks_to_launch = xmalloc(sizeof(uint16_t) * msg->nnodes);
+ msg->cpus_allocated = xmalloc(sizeof(uint16_t) * msg->nnodes);
+ msg->global_task_ids = xmalloc(sizeof(uint32_t *) *
+ msg->nnodes);
+ for (i = 0; i < msg->nnodes; i++) {
+ safe_unpack16(&msg->tasks_to_launch[i], buffer);
+ safe_unpack16(&msg->cpus_allocated[i], buffer);
+ safe_unpack32_array(&msg->global_task_ids[i],
+ &uint32_tmp,
+ buffer);
+ if (msg->tasks_to_launch[i] != (uint16_t) uint32_tmp)
+ goto unpack_error;
+ }
+ safe_unpack16(&msg->num_resp_port, buffer);
+ if (msg->num_resp_port > 0) {
+ msg->resp_port = xmalloc(sizeof(uint16_t) *
+ msg->num_resp_port);
+ for (i = 0; i < msg->num_resp_port; i++)
+ safe_unpack16(&msg->resp_port[i], buffer);
+ }
+ slurm_unpack_slurm_addr_no_alloc(&msg->orig_addr, buffer);
+ safe_unpackstr_array(&msg->env, &msg->envc, buffer);
+ safe_unpackstr_array(&msg->spank_job_env,
+ &msg->spank_job_env_size, buffer);
+ safe_unpackstr_xmalloc(&msg->cwd, &uint32_tmp, buffer);
+ safe_unpack16(&msg->cpu_bind_type, buffer);
+ safe_unpackstr_xmalloc(&msg->cpu_bind, &uint32_tmp, buffer);
+ safe_unpack16(&msg->mem_bind_type, buffer);
+ safe_unpackstr_xmalloc(&msg->mem_bind, &uint32_tmp, buffer);
+ safe_unpackstr_array(&msg->argv, &msg->argc, buffer);
+ safe_unpack16(&msg->task_flags, buffer);
+ safe_unpack16(&msg->multi_prog, buffer);
+ safe_unpack16(&msg->user_managed_io, buffer);
+ if (msg->user_managed_io == 0) {
+ safe_unpackstr_xmalloc(&msg->ofname, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&msg->efname, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&msg->ifname, &uint32_tmp,
+ buffer);
+ safe_unpack8(&msg->buffered_stdio, buffer);
+ safe_unpack8(&msg->labelio, buffer);
+ safe_unpack16(&msg->num_io_port, buffer);
+ if (msg->num_io_port > 0) {
+ msg->io_port = xmalloc(sizeof(uint16_t) *
+ msg->num_io_port);
+ for (i = 0; i < msg->num_io_port; i++)
+ safe_unpack16(&msg->io_port[i],
+ buffer);
+ }
+ }
+ safe_unpackstr_xmalloc(&msg->task_prolog, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&msg->task_epilog, &uint32_tmp, buffer);
+ safe_unpack16(&msg->slurmd_debug, buffer);
+
+ switch_alloc_jobinfo(&msg->switch_job);
+ if (switch_unpack_jobinfo(msg->switch_job, buffer) < 0) {
+ error("switch_unpack_jobinfo: %m");
+ switch_free_jobinfo(msg->switch_job);
+ goto unpack_error;
+ }
+ msg->options = job_options_create();
+ if (job_options_unpack(msg->options, buffer) < 0) {
+ error("Unable to unpack extra job options: %m");
+ goto unpack_error;
+ }
+ safe_unpackstr_xmalloc(&msg->alias_list, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&msg->complete_nodelist, &uint32_tmp,
+ buffer);
+
+ safe_unpack8(&msg->open_mode, buffer);
+ safe_unpack8(&msg->pty, buffer);
+ safe_unpack16(&msg->acctg_freq, buffer);
+ safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer);
+ if (!(cluster_flags & CLUSTER_FLAG_BG)) {
+ select_g_select_jobinfo_unpack(&msg->select_jobinfo,
+ buffer,
+ protocol_version);
+ }
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
safe_unpack32(&msg->job_id, buffer);
safe_unpack32(&msg->job_step_id, buffer);
safe_unpack32(&msg->ntasks, buffer);
@@ -7427,6 +8321,35 @@
}
static void
+_pack_reboot_msg(reboot_msg_t * msg, Buf buffer,
+ uint16_t protocol_version)
+{
+ if (msg && msg->node_list)
+ packstr(msg->node_list, buffer);
+ else
+ packnull(buffer);
+}
+
+static int
+_unpack_reboot_msg(reboot_msg_t ** msg_ptr, Buf buffer,
+ uint16_t protocol_version)
+ {
+ reboot_msg_t *msg;
+ uint32_t uint32_tmp;
+
+ msg = xmalloc(sizeof(reboot_msg_t));
+ *msg_ptr = msg;
+
+ safe_unpackstr_xmalloc(&msg->node_list, &uint32_tmp, buffer);
+ return SLURM_SUCCESS;
+
+unpack_error:
+ slurm_free_reboot_msg(msg);
+ *msg_ptr = NULL;
+ return SLURM_ERROR;
+}
+
+static void
_pack_shutdown_msg(shutdown_msg_t * msg, Buf buffer,
uint16_t protocol_version)
{
@@ -7849,7 +8772,82 @@
ListIterator itr;
block_job_info_t *job;
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ if (!block_info) {
+ packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
+
+ pack32(1, buffer);
+ pack16((uint16_t)NO_VAL, buffer);
+
+ packnull(buffer);
+ packnull(buffer);
+
+ pack32(NO_VAL, buffer);
+
+ packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
+ packnull(buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack16((uint16_t)NO_VAL, buffer);
+ packnull(buffer);
+ packnull(buffer);
+ pack16((uint16_t)NO_VAL, buffer);
+ packnull(buffer);
+ return;
+ }
+
+ packstr(block_info->bg_block_id, buffer);
+ packstr(block_info->blrtsimage, buffer);
+
+ if (block_info->mp_inx) {
+ char *bitfmt = inx2bitfmt(block_info->mp_inx);
+ packstr(bitfmt, buffer);
+ xfree(bitfmt);
+ } else
+ packnull(buffer);
+
+ pack32(cluster_dims, buffer);
+ for (dim = 0; dim < cluster_dims; dim++)
+ pack16(block_info->conn_type[dim], buffer);
+
+ packstr(block_info->ionode_str, buffer);
+
+ if (block_info->ionode_inx) {
+ char *bitfmt =
+ inx2bitfmt(block_info->ionode_inx);
+ packstr(bitfmt, buffer);
+ xfree(bitfmt);
+ } else
+ packnull(buffer);
+
+ if (block_info->job_list)
+ count = list_count(block_info->job_list);
+
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(block_info->job_list);
+ while ((job = list_next(itr))) {
+ slurm_pack_block_job_info(job, buffer,
+ protocol_version);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ packstr(block_info->linuximage, buffer);
+ packstr(block_info->mloaderimage, buffer);
+ packstr(block_info->mp_str, buffer);
+ pack32(block_info->cnode_cnt, buffer);
+ pack32(block_info->cnode_err_cnt, buffer);
+ pack16(block_info->node_use, buffer);
+ packstr(block_info->ramdiskimage, buffer);
+ packstr(block_info->reason, buffer);
+ pack16(block_info->state, buffer);
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
if (!block_info) {
packnull(buffer);
packnull(buffer);
@@ -7916,24 +8914,19 @@
}
count = NO_VAL;
- pack32(block_info->job_running, buffer);
+ pack32(NO_VAL, buffer); /* for job_running */
packstr(block_info->linuximage, buffer);
packstr(block_info->mloaderimage, buffer);
packstr(block_info->mp_str, buffer);
- packstr(block_info->mp_used_str, buffer);
+ packnull(buffer); /* for mp_used_str */
pack32(block_info->cnode_cnt, buffer);
pack16(block_info->node_use, buffer);
- packstr(block_info->owner_name, buffer);
+ packnull(buffer); /* for user name */
packstr(block_info->ramdiskimage, buffer);
packstr(block_info->reason, buffer);
pack16(block_info->state, buffer);
- if (block_info->mp_used_inx) {
- char *bitfmt = inx2bitfmt(block_info->mp_used_inx);
- packstr(bitfmt, buffer);
- xfree(bitfmt);
- } else
- packnull(buffer);
+ packnull(buffer); /* for mp_used_inx */
} else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
if (!block_info) {
packnull(buffer);
@@ -7984,7 +8977,7 @@
} else
packnull(buffer);
- pack32(block_info->job_running, buffer);
+ pack32(NO_VAL, buffer); /* for job_running */
packstr(block_info->linuximage, buffer);
packstr(block_info->mloaderimage, buffer);
@@ -7992,7 +8985,7 @@
pack32(block_info->cnode_cnt, buffer);
if(cluster_flags & CLUSTER_FLAG_BGL)
pack16(block_info->node_use, buffer);
- packstr(block_info->owner_name, buffer);
+ packnull(buffer); /* for user_name */
packstr(block_info->ramdiskimage, buffer);
packstr(block_info->reason, buffer);
pack16(block_info->state, buffer);
@@ -8044,7 +9037,7 @@
} else
packnull(buffer);
- pack32(block_info->job_running, buffer);
+ pack32(NO_VAL, buffer); /* for job_running */
packstr(block_info->linuximage, buffer);
packstr(block_info->mloaderimage, buffer);
@@ -8052,7 +9045,7 @@
pack32(block_info->cnode_cnt, buffer);
if(cluster_flags & CLUSTER_FLAG_BGL)
pack16(block_info->node_use, buffer);
- packstr(block_info->owner_name, buffer);
+ packnull(buffer); /* for user_name */
packstr(block_info->ramdiskimage, buffer);
pack16(block_info->state, buffer);
}
@@ -8094,7 +9087,7 @@
memset(block_info, 0, sizeof(block_info_t));
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc(&block_info->bg_block_id,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->blrtsimage,
@@ -8138,31 +9131,93 @@
}
}
- safe_unpack32(&block_info->job_running, buffer);
safe_unpackstr_xmalloc(&block_info->linuximage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->mloaderimage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&(block_info->mp_str), &uint32_tmp,
buffer);
- safe_unpackstr_xmalloc(&(block_info->mp_used_str), &uint32_tmp,
+ safe_unpack32(&block_info->cnode_cnt, buffer);
+ safe_unpack32(&block_info->cnode_err_cnt, buffer);
+ safe_unpack16(&block_info->node_use, buffer);
+ safe_unpackstr_xmalloc(&block_info->ramdiskimage,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&block_info->reason,
+ &uint32_tmp, buffer);
+ safe_unpack16(&block_info->state, buffer);
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ safe_unpackstr_xmalloc(&block_info->bg_block_id,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&block_info->blrtsimage,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ if (mp_inx_str == NULL) {
+ block_info->mp_inx = bitfmt2int("");
+ } else {
+ block_info->mp_inx = bitfmt2int(mp_inx_str);
+ xfree(mp_inx_str);
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count > HIGHEST_DIMENSIONS) {
+ error("slurm_unpack_block_info_members: count of "
+ "system is %d but we can only handle %d",
+ count, HIGHEST_DIMENSIONS);
+ goto unpack_error;
+ }
+ for (i=0; i<count; i++)
+ safe_unpack16(&block_info->conn_type[i], buffer);
+ safe_unpackstr_xmalloc(&(block_info->ionode_str),
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ if (mp_inx_str == NULL) {
+ block_info->ionode_inx = bitfmt2int("");
+ } else {
+ block_info->ionode_inx = bitfmt2int(mp_inx_str);
+ xfree(mp_inx_str);
+ }
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ block_info->job_list =
+ list_create(slurm_free_block_job_info);
+ for (i=0; i<count; i++) {
+ if (_unpack_block_job_info(&job, buffer,
+ protocol_version)
+ == SLURM_ERROR)
+ goto unpack_error;
+ list_append(block_info->job_list, job);
+ }
+ }
+
+ safe_unpack32(&uint32_tmp, buffer);
+ if (uint32_tmp != (uint32_t)NO_JOB_RUNNING) {
+ block_job_info_t *job =
+ xmalloc(sizeof(block_job_info_t));;
+ if (!block_info->job_list)
+ block_info->job_list =
+ list_create(slurm_free_block_job_info);
+ job->job_id = uint32_tmp;
+ list_append(block_info->job_list, job);
+ }
+ safe_unpackstr_xmalloc(&block_info->linuximage,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&block_info->mloaderimage,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&(block_info->mp_str), &uint32_tmp,
buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ xfree(mp_inx_str);
safe_unpack32(&block_info->cnode_cnt, buffer);
safe_unpack16(&block_info->node_use, buffer);
- safe_unpackstr_xmalloc(&block_info->owner_name,
- &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ xfree(mp_inx_str);
safe_unpackstr_xmalloc(&block_info->ramdiskimage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->reason,
&uint32_tmp, buffer);
safe_unpack16(&block_info->state, buffer);
safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
- if (mp_inx_str == NULL) {
- block_info->mp_used_inx = bitfmt2int("");
- } else {
- block_info->mp_used_inx = bitfmt2int(mp_inx_str);
- xfree(mp_inx_str);
- }
+ xfree(mp_inx_str);
} else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc(&block_info->bg_block_id,
&uint32_tmp, buffer);
@@ -8186,7 +9241,16 @@
block_info->ionode_inx = bitfmt2int(mp_inx_str);
xfree(mp_inx_str);
}
- safe_unpack32(&block_info->job_running, buffer);
+ safe_unpack32(&uint32_tmp, buffer);
+ if (uint32_tmp != (uint32_t)NO_JOB_RUNNING) {
+ block_job_info_t *job =
+ xmalloc(sizeof(block_job_info_t));;
+ if (!block_info->job_list)
+ block_info->job_list =
+ list_create(slurm_free_block_job_info);
+ job->job_id = uint32_tmp;
+ list_append(block_info->job_list, job);
+ }
safe_unpackstr_xmalloc(&block_info->linuximage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->mloaderimage,
@@ -8196,8 +9260,8 @@
safe_unpack32(&block_info->cnode_cnt, buffer);
if (cluster_flags & CLUSTER_FLAG_BGL)
safe_unpack16(&block_info->node_use, buffer);
- safe_unpackstr_xmalloc(&block_info->owner_name,
- &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ xfree(mp_inx_str);
safe_unpackstr_xmalloc(&block_info->ramdiskimage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->reason,
@@ -8226,7 +9290,16 @@
block_info->ionode_inx = bitfmt2int(mp_inx_str);
xfree(mp_inx_str);
}
- safe_unpack32(&block_info->job_running, buffer);
+ safe_unpack32(&uint32_tmp, buffer);
+ if (uint32_tmp != (uint32_t)NO_JOB_RUNNING) {
+ block_job_info_t *job =
+ xmalloc(sizeof(block_job_info_t));;
+ if (!block_info->job_list)
+ block_info->job_list =
+ list_create(slurm_free_block_job_info);
+ job->job_id = uint32_tmp;
+ list_append(block_info->job_list, job);
+ }
safe_unpackstr_xmalloc(&block_info->linuximage,
&uint32_tmp, buffer);
safe_unpackstr_xmalloc(&block_info->mloaderimage,
@@ -8236,8 +9309,8 @@
safe_unpack32(&block_info->cnode_cnt, buffer);
if(cluster_flags & CLUSTER_FLAG_BGL)
safe_unpack16(&block_info->node_use, buffer);
- safe_unpackstr_xmalloc(&block_info->owner_name,
- &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&mp_inx_str, &uint32_tmp, buffer);
+ xfree(mp_inx_str);
safe_unpackstr_xmalloc(&block_info->ramdiskimage,
&uint32_tmp, buffer);
safe_unpack16(&block_info->state, buffer);
@@ -8577,51 +9650,106 @@
{
xassert(msg != NULL);
- pack32(msg->job_id, buffer);
- pack32(msg->step_id, buffer);
- pack32(msg->uid, buffer);
- pack32(msg->gid, buffer);
- pack32(msg->ntasks, buffer);
- pack32(msg->pn_min_memory, buffer);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ pack32(msg->job_id, buffer);
+ pack32(msg->step_id, buffer);
+ pack32(msg->uid, buffer);
+ pack32(msg->gid, buffer);
+ pack32(msg->ntasks, buffer);
+ pack32(msg->pn_min_memory, buffer);
- pack8(msg->open_mode, buffer);
- pack8(msg->overcommit, buffer);
+ pack8(msg->open_mode, buffer);
+ pack8(msg->overcommit, buffer);
- pack16(msg->acctg_freq, buffer);
- pack16(msg->cpu_bind_type, buffer);
- pack16(msg->cpus_per_task, buffer);
- pack16(msg->restart_cnt, buffer);
+ pack16(msg->acctg_freq, buffer);
+ pack16(msg->cpu_bind_type, buffer);
+ pack16(msg->cpus_per_task, buffer);
+ pack16(msg->restart_cnt, buffer);
- pack32(msg->num_cpu_groups, buffer);
- if (msg->num_cpu_groups) {
- pack16_array(msg->cpus_per_node, msg->num_cpu_groups, buffer);
- pack32_array(msg->cpu_count_reps, msg->num_cpu_groups, buffer);
+ pack32(msg->num_cpu_groups, buffer);
+ if (msg->num_cpu_groups) {
+ pack16_array(msg->cpus_per_node, msg->num_cpu_groups,
+ buffer);
+ pack32_array(msg->cpu_count_reps, msg->num_cpu_groups,
+ buffer);
+ }
+
+ packstr(msg->alias_list, buffer);
+ packstr(msg->cpu_bind, buffer);
+ packstr(msg->nodes, buffer);
+ packstr(msg->script, buffer);
+ packstr(msg->work_dir, buffer);
+ packstr(msg->ckpt_dir, buffer);
+ packstr(msg->restart_dir, buffer);
+
+ packstr(msg->std_err, buffer);
+ packstr(msg->std_in, buffer);
+ packstr(msg->std_out, buffer);
+
+ pack32(msg->argc, buffer);
+ packstr_array(msg->argv, msg->argc, buffer);
+ packstr_array(msg->spank_job_env, msg->spank_job_env_size,
+ buffer);
+
+ pack32(msg->envc, buffer);
+ packstr_array(msg->environment, msg->envc, buffer);
+
+ pack32(msg->job_mem, buffer);
+
+ slurm_cred_pack(msg->cred, buffer);
+
+ select_g_select_jobinfo_pack(msg->select_jobinfo, buffer,
+ protocol_version);
+ } else {
+ pack32(msg->job_id, buffer);
+ pack32(msg->step_id, buffer);
+ pack32(msg->uid, buffer);
+ pack32(msg->gid, buffer);
+ pack32(msg->ntasks, buffer);
+ pack32(msg->pn_min_memory, buffer);
+
+ pack8(msg->open_mode, buffer);
+ pack8(msg->overcommit, buffer);
+
+ pack16(msg->acctg_freq, buffer);
+ pack16(msg->cpu_bind_type, buffer);
+ pack16(msg->cpus_per_task, buffer);
+ pack16(msg->restart_cnt, buffer);
+
+ pack32(msg->num_cpu_groups, buffer);
+ if (msg->num_cpu_groups) {
+ pack16_array(msg->cpus_per_node, msg->num_cpu_groups,
+ buffer);
+ pack32_array(msg->cpu_count_reps, msg->num_cpu_groups,
+ buffer);
+ }
+
+ packstr(msg->cpu_bind, buffer);
+ packstr(msg->nodes, buffer);
+ packstr(msg->script, buffer);
+ packstr(msg->work_dir, buffer);
+ packstr(msg->ckpt_dir, buffer);
+ packstr(msg->restart_dir, buffer);
+
+ packstr(msg->std_err, buffer);
+ packstr(msg->std_in, buffer);
+ packstr(msg->std_out, buffer);
+
+ pack32(msg->argc, buffer);
+ packstr_array(msg->argv, msg->argc, buffer);
+ packstr_array(msg->spank_job_env, msg->spank_job_env_size,
+ buffer);
+
+ pack32(msg->envc, buffer);
+ packstr_array(msg->environment, msg->envc, buffer);
+
+ pack32(msg->job_mem, buffer);
+
+ slurm_cred_pack(msg->cred, buffer);
+
+ select_g_select_jobinfo_pack(msg->select_jobinfo, buffer,
+ protocol_version);
}
-
- packstr(msg->cpu_bind, buffer);
- packstr(msg->nodes, buffer);
- packstr(msg->script, buffer);
- packstr(msg->work_dir, buffer);
- packstr(msg->ckpt_dir, buffer);
- packstr(msg->restart_dir, buffer);
-
- packstr(msg->std_err, buffer);
- packstr(msg->std_in, buffer);
- packstr(msg->std_out, buffer);
-
- pack32(msg->argc, buffer);
- packstr_array(msg->argv, msg->argc, buffer);
- packstr_array(msg->spank_job_env, msg->spank_job_env_size, buffer);
-
- pack32(msg->envc, buffer);
- packstr_array(msg->environment, msg->envc, buffer);
-
- pack32(msg->job_mem, buffer);
-
- slurm_cred_pack(msg->cred, buffer);
-
- select_g_select_jobinfo_pack(msg->select_jobinfo, buffer,
- protocol_version);
}
static int
@@ -8635,65 +9763,149 @@
launch_msg_ptr = xmalloc(sizeof(batch_job_launch_msg_t));
*msg = launch_msg_ptr;
- safe_unpack32(&launch_msg_ptr->job_id, buffer);
- safe_unpack32(&launch_msg_ptr->step_id, buffer);
- safe_unpack32(&launch_msg_ptr->uid, buffer);
- safe_unpack32(&launch_msg_ptr->gid, buffer);
- safe_unpack32(&launch_msg_ptr->ntasks, buffer);
- safe_unpack32(&launch_msg_ptr->pn_min_memory, buffer);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&launch_msg_ptr->job_id, buffer);
+ safe_unpack32(&launch_msg_ptr->step_id, buffer);
+ safe_unpack32(&launch_msg_ptr->uid, buffer);
+ safe_unpack32(&launch_msg_ptr->gid, buffer);
+ safe_unpack32(&launch_msg_ptr->ntasks, buffer);
+ safe_unpack32(&launch_msg_ptr->pn_min_memory, buffer);
- safe_unpack8(&launch_msg_ptr->open_mode, buffer);
- safe_unpack8(&launch_msg_ptr->overcommit, buffer);
+ safe_unpack8(&launch_msg_ptr->open_mode, buffer);
+ safe_unpack8(&launch_msg_ptr->overcommit, buffer);
- safe_unpack16(&launch_msg_ptr->acctg_freq, buffer);
- safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer);
- safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer);
- safe_unpack16(&launch_msg_ptr->restart_cnt, buffer);
+ safe_unpack16(&launch_msg_ptr->acctg_freq, buffer);
+ safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer);
+ safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer);
+ safe_unpack16(&launch_msg_ptr->restart_cnt, buffer);
- safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer);
- if (launch_msg_ptr->num_cpu_groups) {
- safe_unpack16_array(&(launch_msg_ptr->cpus_per_node),
- &uint32_tmp, buffer);
- if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+ safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer);
+ if (launch_msg_ptr->num_cpu_groups) {
+ safe_unpack16_array(&(launch_msg_ptr->cpus_per_node),
+ &uint32_tmp, buffer);
+ if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ safe_unpack32_array(&(launch_msg_ptr->cpu_count_reps),
+ &uint32_tmp, buffer);
+ if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ }
+
+
+ safe_unpackstr_xmalloc(&launch_msg_ptr->alias_list,
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->cpu_bind, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->nodes, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->script, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->work_dir, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->ckpt_dir, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->restart_dir,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_err, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_in, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_out, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&launch_msg_ptr->argc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->argv,
+ &launch_msg_ptr->argc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->spank_job_env,
+ &launch_msg_ptr->spank_job_env_size,
+ buffer);
+
+ safe_unpack32(&launch_msg_ptr->envc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->environment,
+ &launch_msg_ptr->envc, buffer);
+
+ safe_unpack32(&launch_msg_ptr->job_mem, buffer);
+
+ if (!(launch_msg_ptr->cred = slurm_cred_unpack(buffer,
+ protocol_version)))
goto unpack_error;
- safe_unpack32_array(&(launch_msg_ptr->cpu_count_reps),
- &uint32_tmp, buffer);
- if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+
+ if (select_g_select_jobinfo_unpack(&launch_msg_ptr->
+ select_jobinfo,
+ buffer, protocol_version))
+ goto unpack_error;
+ } else {
+ safe_unpack32(&launch_msg_ptr->job_id, buffer);
+ safe_unpack32(&launch_msg_ptr->step_id, buffer);
+ safe_unpack32(&launch_msg_ptr->uid, buffer);
+ safe_unpack32(&launch_msg_ptr->gid, buffer);
+ safe_unpack32(&launch_msg_ptr->ntasks, buffer);
+ safe_unpack32(&launch_msg_ptr->pn_min_memory, buffer);
+
+ safe_unpack8(&launch_msg_ptr->open_mode, buffer);
+ safe_unpack8(&launch_msg_ptr->overcommit, buffer);
+
+ safe_unpack16(&launch_msg_ptr->acctg_freq, buffer);
+ safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer);
+ safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer);
+ safe_unpack16(&launch_msg_ptr->restart_cnt, buffer);
+
+ safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer);
+ if (launch_msg_ptr->num_cpu_groups) {
+ safe_unpack16_array(&(launch_msg_ptr->cpus_per_node),
+ &uint32_tmp, buffer);
+ if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ safe_unpack32_array(&(launch_msg_ptr->cpu_count_reps),
+ &uint32_tmp, buffer);
+ if (launch_msg_ptr->num_cpu_groups != uint32_tmp)
+ goto unpack_error;
+ }
+
+ safe_unpackstr_xmalloc(&launch_msg_ptr->cpu_bind, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->nodes, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->script, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->work_dir, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->ckpt_dir, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->restart_dir,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_err, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_in, &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&launch_msg_ptr->std_out, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&launch_msg_ptr->argc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->argv,
+ &launch_msg_ptr->argc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->spank_job_env,
+ &launch_msg_ptr->spank_job_env_size,
+ buffer);
+
+ safe_unpack32(&launch_msg_ptr->envc, buffer);
+ safe_unpackstr_array(&launch_msg_ptr->environment,
+ &launch_msg_ptr->envc, buffer);
+
+ safe_unpack32(&launch_msg_ptr->job_mem, buffer);
+
+ if (!(launch_msg_ptr->cred = slurm_cred_unpack(buffer,
+ protocol_version)))
+ goto unpack_error;
+
+ if (select_g_select_jobinfo_unpack(&launch_msg_ptr->
+ select_jobinfo,
+ buffer, protocol_version))
goto unpack_error;
}
- safe_unpackstr_xmalloc(&launch_msg_ptr->cpu_bind, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->nodes, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->script, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->work_dir, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->ckpt_dir, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->restart_dir,
- &uint32_tmp, buffer);
-
- safe_unpackstr_xmalloc(&launch_msg_ptr->std_err, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->std_in, &uint32_tmp, buffer);
- safe_unpackstr_xmalloc(&launch_msg_ptr->std_out, &uint32_tmp, buffer);
-
- safe_unpack32(&launch_msg_ptr->argc, buffer);
- safe_unpackstr_array(&launch_msg_ptr->argv,
- &launch_msg_ptr->argc, buffer);
- safe_unpackstr_array(&launch_msg_ptr->spank_job_env,
- &launch_msg_ptr->spank_job_env_size, buffer);
-
- safe_unpack32(&launch_msg_ptr->envc, buffer);
- safe_unpackstr_array(&launch_msg_ptr->environment,
- &launch_msg_ptr->envc, buffer);
-
- safe_unpack32(&launch_msg_ptr->job_mem, buffer);
-
- if (!(launch_msg_ptr->cred = slurm_cred_unpack(buffer,
- protocol_version)))
- goto unpack_error;
-
- if (select_g_select_jobinfo_unpack(&launch_msg_ptr->select_jobinfo,
- buffer, protocol_version))
- goto unpack_error;
-
return SLURM_SUCCESS;
unpack_error:
@@ -9015,6 +10227,35 @@
return SLURM_ERROR;
}
+static void _pack_forward_data_msg(forward_data_msg_t *msg,
+ Buf buffer, uint16_t protocol_version)
+{
+ xassert (msg != NULL);
+ packstr(msg->address, buffer);
+ pack32(msg->len, buffer);
+ packmem(msg->data, msg->len, buffer);
+}
+
+static int _unpack_forward_data_msg(forward_data_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version)
+{
+ forward_data_msg_t *msg;
+ uint32_t temp32;
+
+ xassert (msg_ptr != NULL);
+ msg = xmalloc(sizeof(forward_data_msg_t));
+ *msg_ptr = msg;
+ safe_unpackstr_xmalloc(&msg->address, &temp32, buffer);
+ safe_unpack32(&msg->len, buffer);
+ safe_unpackmem_xmalloc(&msg->data, &temp32, buffer);
+
+ return SLURM_SUCCESS;
+
+unpack_error:
+ slurm_free_forward_data_msg(msg);
+ *msg_ptr = NULL;
+ return SLURM_ERROR;
+}
static void
_pack_checkpoint_msg(checkpoint_msg_t *msg, Buf buffer,
@@ -9232,7 +10473,19 @@
int i;
uint16_t uint16_tmp;
- if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ pack32(msg->record_count, buffer);
+ for (i=0; i<msg->record_count; i++) {
+ pack16 (msg->trigger_array[i].flags, buffer);
+ pack32 (msg->trigger_array[i].trig_id, buffer);
+ pack16 (msg->trigger_array[i].res_type, buffer);
+ packstr(msg->trigger_array[i].res_id, buffer);
+ pack32 (msg->trigger_array[i].trig_type, buffer);
+ pack16 (msg->trigger_array[i].offset, buffer);
+ pack32 (msg->trigger_array[i].user_id, buffer);
+ packstr(msg->trigger_array[i].program, buffer);
+ }
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
pack32(msg->record_count, buffer);
for (i=0; i<msg->record_count; i++) {
pack32 (msg->trigger_array[i].trig_id, buffer);
@@ -9266,7 +10519,23 @@
uint32_t uint32_tmp;
trigger_info_msg_t *msg = xmalloc(sizeof(trigger_info_msg_t));
- if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32 (&msg->record_count, buffer);
+ msg->trigger_array = xmalloc(sizeof(trigger_info_t) *
+ msg->record_count);
+ for (i=0; i<msg->record_count; i++) {
+ safe_unpack16(&msg->trigger_array[i].flags, buffer);
+ safe_unpack32(&msg->trigger_array[i].trig_id, buffer);
+ safe_unpack16(&msg->trigger_array[i].res_type, buffer);
+ safe_unpackstr_xmalloc(&msg->trigger_array[i].res_id,
+ &uint32_tmp, buffer);
+ safe_unpack32(&msg->trigger_array[i].trig_type, buffer);
+ safe_unpack16(&msg->trigger_array[i].offset, buffer);
+ safe_unpack32(&msg->trigger_array[i].user_id, buffer);
+ safe_unpackstr_xmalloc(&msg->trigger_array[i].program,
+ &uint32_tmp, buffer);
+ }
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
safe_unpack32 (&msg->record_count, buffer);
msg->trigger_array = xmalloc(sizeof(trigger_info_t) *
msg->record_count);
@@ -9949,6 +11218,92 @@
return SLURM_ERROR;
}
+static void _pack_stats_request_msg(stats_info_request_msg_t *msg, Buf buffer,
+ uint16_t protocol_version)
+{
+ xassert ( msg != NULL );
+
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION)
+ pack16(msg->command_id, buffer);
+}
+
+static int _unpack_stats_request_msg(stats_info_request_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version)
+{
+ stats_info_request_msg_t * msg;
+ xassert ( msg_ptr != NULL );
+
+ msg = xmalloc ( sizeof(stats_info_request_msg_t) );
+ *msg_ptr = msg ;
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION)
+ safe_unpack16(&msg->command_id, buffer);
+ else
+ goto unpack_error;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ info("SIM: unpack_stats_request_msg error");
+ *msg_ptr = NULL;
+ slurm_free_stats_info_request_msg(msg);
+ return SLURM_ERROR;
+}
+
+static int _unpack_stats_response_msg(stats_info_response_msg_t **msg_ptr,
+ Buf buffer, uint16_t protocol_version)
+{
+ stats_info_response_msg_t * msg;
+ xassert ( msg_ptr != NULL );
+
+ msg = xmalloc ( sizeof (stats_info_response_msg_t) );
+ *msg_ptr = msg ;
+
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&msg->parts_packed, buffer);
+ if (msg->parts_packed) {
+ safe_unpack_time(&msg->req_time, buffer);
+ safe_unpack_time(&msg->req_time_start, buffer);
+ safe_unpack32(&msg->server_thread_count,buffer);
+ safe_unpack32(&msg->agent_queue_size, buffer);
+ safe_unpack32(&msg->jobs_submitted, buffer);
+ safe_unpack32(&msg->jobs_started, buffer);
+ safe_unpack32(&msg->jobs_completed, buffer);
+ safe_unpack32(&msg->jobs_canceled, buffer);
+ safe_unpack32(&msg->jobs_failed, buffer);
+
+ safe_unpack32(&msg->schedule_cycle_max, buffer);
+ safe_unpack32(&msg->schedule_cycle_last,buffer);
+ safe_unpack32(&msg->schedule_cycle_sum, buffer);
+ safe_unpack32(&msg->schedule_cycle_counter, buffer);
+ safe_unpack32(&msg->schedule_cycle_depth, buffer);
+ safe_unpack32(&msg->schedule_queue_len, buffer);
+
+ safe_unpack32(&msg->bf_backfilled_jobs, buffer);
+ safe_unpack32(&msg->bf_last_backfilled_jobs, buffer);
+ safe_unpack32(&msg->bf_cycle_counter, buffer);
+ safe_unpack32(&msg->bf_cycle_sum, buffer);
+ safe_unpack32(&msg->bf_cycle_last, buffer);
+ safe_unpack32(&msg->bf_last_depth, buffer);
+ safe_unpack32(&msg->bf_last_depth_try, buffer);
+
+ safe_unpack32(&msg->bf_queue_len, buffer);
+ safe_unpack32(&msg->bf_cycle_max, buffer);
+ safe_unpack_time(&msg->bf_when_last_cycle, buffer);
+ safe_unpack32(&msg->bf_depth_sum, buffer);
+ safe_unpack32(&msg->bf_depth_try_sum, buffer);
+ safe_unpack32(&msg->bf_queue_len_sum, buffer);
+ safe_unpack32(&msg->bf_active, buffer);
+ }
+ } else
+ goto unpack_error;
+
+ return SLURM_SUCCESS;
+
+unpack_error:
+ info("SIM: unpack_stats_response_msg error");
+ *msg_ptr = NULL;
+ slurm_free_stats_response_msg(msg);
+ return SLURM_ERROR;
+}
/* template
void pack_ ( * msg , Buf buffer )
diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c
index 2d03617..6846db9 100644
--- a/src/common/slurm_protocol_util.c
+++ b/src/common/slurm_protocol_util.c
@@ -52,8 +52,10 @@
{
uint16_t version;
- if (rpc_version >= 9)
+ if (rpc_version >= 10)
version = SLURM_PROTOCOL_VERSION;
+ else if (rpc_version >= 9)
+ version = SLURM_2_3_PROTOCOL_VERSION;
else if (rpc_version >= 8)
version = SLURM_2_2_PROTOCOL_VERSION;
else if (rpc_version >= 6)
@@ -82,6 +84,7 @@
if (slurmdbd_conf) {
if ((header->version != SLURM_PROTOCOL_VERSION) &&
+ (header->version != SLURM_2_3_PROTOCOL_VERSION) &&
(header->version != SLURM_2_2_PROTOCOL_VERSION) &&
(header->version != SLURM_2_1_PROTOCOL_VERSION))
slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR);
@@ -116,6 +119,7 @@
case REQUEST_PARTITION_INFO:
case REQUEST_PING:
case REQUEST_PRIORITY_FACTORS:
+ case REQUEST_REBOOT_NODES:
case REQUEST_RECONFIGURE:
case REQUEST_RESERVATION_INFO:
case REQUEST_SET_DEBUG_FLAGS:
@@ -143,10 +147,12 @@
case REQUEST_UPDATE_NODE:
case REQUEST_UPDATE_PARTITION:
case REQUEST_UPDATE_RESERVATION:
- if ((header->version == SLURM_2_2_PROTOCOL_VERSION)
+ if ((header->version == SLURM_2_3_PROTOCOL_VERSION)
+ || (header->version == SLURM_2_2_PROTOCOL_VERSION)
|| (header->version == SLURM_2_1_PROTOCOL_VERSION))
break;
default:
+ debug("unsupported RPC %d", header->msg_type);
slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR);
break;
}
@@ -170,15 +176,17 @@
if (msg->protocol_version != (uint16_t)NO_VAL)
header->version = msg->protocol_version;
else if (working_cluster_rec)
- header->version = _get_slurm_version(
+ msg->protocol_version = header->version = _get_slurm_version(
working_cluster_rec->rpc_version);
else if ((msg->msg_type == ACCOUNTING_UPDATE_MSG) ||
(msg->msg_type == ACCOUNTING_FIRST_REG)) {
uint32_t rpc_version =
((accounting_update_msg_t *)msg->data)->rpc_version;
- header->version = _get_slurm_version(rpc_version);
+ msg->protocol_version = header->version =
+ _get_slurm_version(rpc_version);
} else
- header->version = SLURM_PROTOCOL_VERSION;
+ msg->protocol_version = header->version =
+ SLURM_PROTOCOL_VERSION;
header->flags = flags;
header->msg_type = msg->msg_type;
diff --git a/src/common/slurm_step_layout.c b/src/common/slurm_step_layout.c
index ffc0e2a..dc07fda 100644
--- a/src/common/slurm_step_layout.c
+++ b/src/common/slurm_step_layout.c
@@ -283,7 +283,7 @@
extern void pack_slurm_step_layout(slurm_step_layout_t *step_layout,
Buf buffer, uint16_t protocol_version)
{
- uint16_t i = 0;
+ uint32_t i = 0;
if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
if (step_layout)
@@ -650,15 +650,14 @@
int i, j, taskid = 0;
bool over_subscribe = false;
- for (i=0; i<step_layout->node_cnt; i++) {
- step_layout->tids[i] = xmalloc(sizeof(uint32_t)
- * step_layout->task_cnt);
- }
for (j=0; taskid<step_layout->task_cnt; j++) { /* cycle counter */
bool space_remaining = false;
for (i=0; ((i<step_layout->node_cnt)
&& (taskid<step_layout->task_cnt)); i++) {
if ((j<cpus[i]) || over_subscribe) {
+ xrealloc(step_layout->tids[i], sizeof(uint32_t)
+ * (step_layout->tasks[i] + 1));
+
step_layout->tids[i][step_layout->tasks[i]] =
taskid;
taskid++;
diff --git a/src/common/slurm_topology.c b/src/common/slurm_topology.c
index a6ea794..3229fa2 100644
--- a/src/common/slurm_topology.c
+++ b/src/common/slurm_topology.c
@@ -75,7 +75,7 @@
static slurm_topo_context_t *g_topo_context = NULL;
static pthread_mutex_t g_topo_context_lock = PTHREAD_MUTEX_INITIALIZER;
-
+static bool init_run = false;
/* ************************************************************************ */
/* TAG( slurm_topo_get_ops ) */
@@ -207,6 +207,9 @@
int retval = SLURM_SUCCESS;
char *topo_type = NULL;
+ if ( init_run && g_topo_context )
+ return retval;
+
slurm_mutex_lock( &g_topo_context_lock );
if ( g_topo_context )
@@ -226,7 +229,8 @@
slurm_topo_context_destroy( g_topo_context );
g_topo_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
slurm_mutex_unlock( &g_topo_context_lock );
@@ -245,6 +249,7 @@
if (!g_topo_context)
return SLURM_SUCCESS;
+ init_run = false;
rc = slurm_topo_context_destroy(g_topo_context);
g_topo_context = NULL;
return rc;
diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c
index 89a0d99..074cd21 100644
--- a/src/common/slurmdb_defs.c
+++ b/src/common/slurmdb_defs.c
@@ -281,7 +281,8 @@
i = len - cluster_rec->dimensions;
if (nodes[len-1] == ']')
i--;
- if (i > cluster_rec->dimensions) {
+
+ if (i > 0) {
char *p = '\0';
number = xstrntol(nodes + i, &p,
cluster_rec->dimensions, 36);
@@ -313,6 +314,9 @@
if (slurm_strcasestr(flags, "PartitionTimeLimit"))
return QOS_FLAG_PART_TIME_LIMIT;
+ if (slurm_strcasestr(flags, "RequiresReservation"))
+ return QOS_FLAG_REQ_RESV;
+
if (slurm_strcasestr(flags, "NoReserve"))
return QOS_FLAG_NO_RESERVE;
@@ -651,6 +655,8 @@
list_destroy(slurmdb_association->grp_cpus_list);
if(slurmdb_association->grp_jobs_list)
list_destroy(slurmdb_association->grp_jobs_list);
+ if(slurmdb_association->grp_mem_list)
+ list_destroy(slurmdb_association->grp_mem_list);
if(slurmdb_association->grp_nodes_list)
list_destroy(slurmdb_association->grp_nodes_list);
if(slurmdb_association->grp_submit_jobs_list)
@@ -725,6 +731,8 @@
list_destroy(job_cond->cluster_list);
if(job_cond->groupid_list)
list_destroy(job_cond->groupid_list);
+ if(job_cond->jobname_list)
+ list_destroy(job_cond->jobname_list);
if(job_cond->partition_list)
list_destroy(job_cond->partition_list);
if(job_cond->qos_list)
@@ -1028,6 +1036,7 @@
assoc->grp_cpu_run_mins = (uint64_t)NO_VAL;
assoc->grp_cpus = NO_VAL;
assoc->grp_jobs = NO_VAL;
+ assoc->grp_mem = NO_VAL;
assoc->grp_nodes = NO_VAL;
assoc->grp_submit_jobs = NO_VAL;
assoc->grp_wall = NO_VAL;
@@ -1083,6 +1092,7 @@
qos->grp_cpu_run_mins = (uint64_t)NO_VAL;
qos->grp_cpus = NO_VAL;
qos->grp_jobs = NO_VAL;
+ qos->grp_mem = NO_VAL;
qos->grp_nodes = NO_VAL;
qos->grp_submit_jobs = NO_VAL;
qos->grp_wall = NO_VAL;
@@ -1201,6 +1211,8 @@
xstrcat(qos_flags, "PartitionMinNodes,");
if (flags & QOS_FLAG_PART_TIME_LIMIT)
xstrcat(qos_flags, "PartitionTimeLimit,");
+ if (flags & QOS_FLAG_REQ_RESV)
+ xstrcat(qos_flags, "RequiresReservation,");
if (qos_flags)
qos_flags[strlen(qos_flags)-1] = '\0';
@@ -1715,6 +1727,11 @@
else if(assoc_ptr->grp_jobs != NO_VAL)
debug2(" GrpJobs : %u", assoc_ptr->grp_jobs);
+ if(assoc_ptr->grp_mem == INFINITE)
+ debug2(" GrpMemory : NONE");
+ else if(assoc_ptr->grp_mem != NO_VAL)
+ debug2(" GrpMemory : %u", assoc_ptr->grp_mem);
+
if(assoc_ptr->grp_nodes == INFINITE)
debug2(" GrpNodes : NONE");
else if(assoc_ptr->grp_nodes != NO_VAL)
diff --git a/src/common/slurmdb_pack.c b/src/common/slurmdb_pack.c
index b6cfd91..4308b9c 100644
--- a/src/common/slurmdb_pack.c
+++ b/src/common/slurmdb_pack.c
@@ -883,7 +883,120 @@
char *tmp_info = NULL;
slurmdb_association_rec_t *object = (slurmdb_association_rec_t *)in;
- if (rpc_version >= 8) {
+ if (rpc_version >= 10) {
+ if(!object) {
+ pack32(NO_VAL, buffer);
+ packnull(buffer);
+ packnull(buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack64(NO_VAL, buffer);
+ pack64(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack32(0, buffer);
+ pack16(0, buffer);
+ pack32(0, buffer);
+
+ pack64(NO_VAL, buffer);
+ pack64(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ packnull(buffer);
+ pack32(0, buffer);
+ packnull(buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack32(0, buffer);
+ pack32(0, buffer);
+
+ packnull(buffer);
+ return;
+ }
+
+ if(object->accounting_list)
+ count = list_count(object->accounting_list);
+
+ pack32(count, buffer);
+
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->accounting_list);
+ while((slurmdb_info = list_next(itr))) {
+ slurmdb_pack_accounting_rec(slurmdb_info,
+ rpc_version,
+ buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ packstr(object->acct, buffer);
+ packstr(object->cluster, buffer);
+
+ pack32(object->def_qos_id, buffer);
+
+ /* this used to be named fairshare to not have to redo
+ the order of things just to be in alpha order we
+ just renamed it and called it good */
+ pack32(object->shares_raw, buffer);
+
+ pack64(object->grp_cpu_mins, buffer);
+ pack64(object->grp_cpu_run_mins, buffer);
+ pack32(object->grp_cpus, buffer);
+ pack32(object->grp_jobs, buffer);
+ pack32(object->grp_mem, buffer);
+ pack32(object->grp_nodes, buffer);
+ pack32(object->grp_submit_jobs, buffer);
+ pack32(object->grp_wall, buffer);
+
+ pack32(object->id, buffer);
+ pack16(object->is_def, buffer);
+ pack32(object->lft, buffer);
+
+ pack64(object->max_cpu_mins_pj, buffer);
+ pack64(object->max_cpu_run_mins, buffer);
+ pack32(object->max_cpus_pj, buffer);
+ pack32(object->max_jobs, buffer);
+ pack32(object->max_nodes_pj, buffer);
+ pack32(object->max_submit_jobs, buffer);
+ pack32(object->max_wall_pj, buffer);
+
+ packstr(object->parent_acct, buffer);
+ pack32(object->parent_id, buffer);
+ packstr(object->partition, buffer);
+
+ if(object->qos_list)
+ count = list_count(object->qos_list);
+
+ pack32(count, buffer);
+
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->qos_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack32(object->rgt, buffer);
+ pack32(object->uid, buffer);
+
+ packstr(object->user, buffer);
+ } else if (rpc_version >= 8) {
if(!object) {
pack32(NO_VAL, buffer);
packnull(buffer);
@@ -1113,7 +1226,74 @@
slurmdb_init_association_rec(object_ptr, 0);
- if (rpc_version >= 8) {
+ if (rpc_version >= 10) {
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->accounting_list =
+ list_create(slurmdb_destroy_accounting_rec);
+ for(i=0; i<count; i++) {
+ if(slurmdb_unpack_accounting_rec(
+ (void **)&slurmdb_info,
+ rpc_version,
+ buffer) == SLURM_ERROR)
+ goto unpack_error;
+ list_append(object_ptr->accounting_list,
+ slurmdb_info);
+ }
+ }
+
+ safe_unpackstr_xmalloc(&object_ptr->acct, &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->cluster, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&object_ptr->def_qos_id, buffer);
+
+ safe_unpack32(&object_ptr->shares_raw, buffer);
+
+ safe_unpack64(&object_ptr->grp_cpu_mins, buffer);
+ safe_unpack64(&object_ptr->grp_cpu_run_mins, buffer);
+ safe_unpack32(&object_ptr->grp_cpus, buffer);
+ safe_unpack32(&object_ptr->grp_jobs, buffer);
+ safe_unpack32(&object_ptr->grp_mem, buffer);
+ safe_unpack32(&object_ptr->grp_nodes, buffer);
+ safe_unpack32(&object_ptr->grp_submit_jobs, buffer);
+ safe_unpack32(&object_ptr->grp_wall, buffer);
+
+ safe_unpack32(&object_ptr->id, buffer);
+ safe_unpack16(&object_ptr->is_def, buffer);
+ safe_unpack32(&object_ptr->lft, buffer);
+
+ safe_unpack64(&object_ptr->max_cpu_mins_pj, buffer);
+ safe_unpack64(&object_ptr->max_cpu_run_mins, buffer);
+ safe_unpack32(&object_ptr->max_cpus_pj, buffer);
+ safe_unpack32(&object_ptr->max_jobs, buffer);
+ safe_unpack32(&object_ptr->max_nodes_pj, buffer);
+ safe_unpack32(&object_ptr->max_submit_jobs, buffer);
+ safe_unpack32(&object_ptr->max_wall_pj, buffer);
+
+ safe_unpackstr_xmalloc(&object_ptr->parent_acct, &uint32_tmp,
+ buffer);
+ safe_unpack32(&object_ptr->parent_id, buffer);
+ safe_unpackstr_xmalloc(&object_ptr->partition, &uint32_tmp,
+ buffer);
+
+ safe_unpack32(&count, buffer);
+ /* This needs to look for zero to tell if something
+ has changed */
+ if(count != NO_VAL) {
+ object_ptr->qos_list = list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->qos_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&object_ptr->rgt, buffer);
+ safe_unpack32(&object_ptr->uid, buffer);
+
+ safe_unpackstr_xmalloc(&object_ptr->user, &uint32_tmp, buffer);
+ } else if (rpc_version >= 8) {
safe_unpack32(&count, buffer);
if(count != NO_VAL) {
object_ptr->accounting_list =
@@ -1314,7 +1494,94 @@
uint32_t count = NO_VAL;
char *tmp_info = NULL;
- if (rpc_version >= 9) {
+ if (rpc_version >= 10) {
+ if (!object) {
+ packnull(buffer);
+ pack32(0, buffer);
+
+ pack32(QOS_FLAG_NOTSET, buffer);
+
+ pack32(NO_VAL, buffer);
+ pack64(NO_VAL, buffer);
+ pack64(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack64(NO_VAL, buffer);
+ pack64(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ packnull(buffer);
+
+ pack_bit_str(NULL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack16(0, buffer);
+ pack32(0, buffer);
+
+ packdouble((double)NO_VAL, buffer);
+ packdouble((double)NO_VAL, buffer);
+ return;
+ }
+ packstr(object->description, buffer);
+ pack32(object->id, buffer);
+
+ pack32(object->flags, buffer);
+
+ pack32(object->grace_time, buffer);
+ pack64(object->grp_cpu_mins, buffer);
+ pack64(object->grp_cpu_run_mins, buffer);
+ pack32(object->grp_cpus, buffer);
+ pack32(object->grp_jobs, buffer);
+ pack32(object->grp_mem, buffer);
+ pack32(object->grp_nodes, buffer);
+ pack32(object->grp_submit_jobs, buffer);
+ pack32(object->grp_wall, buffer);
+
+ pack64(object->max_cpu_mins_pj, buffer);
+ pack64(object->max_cpu_run_mins_pu, buffer);
+ pack32(object->max_cpus_pj, buffer);
+ pack32(object->max_cpus_pu, buffer);
+ pack32(object->max_jobs_pu, buffer);
+ pack32(object->max_nodes_pj, buffer);
+ pack32(object->max_nodes_pu, buffer);
+ pack32(object->max_submit_jobs_pu, buffer);
+ pack32(object->max_wall_pj, buffer);
+
+ packstr(object->name, buffer);
+
+ pack_bit_str(object->preempt_bitstr, buffer);
+
+ if (object->preempt_list)
+ count = list_count(object->preempt_list);
+
+ pack32(count, buffer);
+
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->preempt_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack16(object->preempt_mode, buffer);
+ pack32(object->priority, buffer);
+
+ packdouble(object->usage_factor, buffer);
+ packdouble(object->usage_thres, buffer);
+ } else if (rpc_version >= 9) {
if (!object) {
packnull(buffer);
pack32(0, buffer);
@@ -1561,7 +1828,55 @@
slurmdb_init_qos_rec(object_ptr, 0);
- if (rpc_version >= 9) {
+ if (rpc_version >= 10) {
+ safe_unpackstr_xmalloc(&object_ptr->description,
+ &uint32_tmp, buffer);
+ safe_unpack32(&object_ptr->id, buffer);
+
+ safe_unpack32(&object_ptr->flags, buffer);
+
+ safe_unpack32(&object_ptr->grace_time, buffer);
+ safe_unpack64(&object_ptr->grp_cpu_mins, buffer);
+ safe_unpack64(&object_ptr->grp_cpu_run_mins, buffer);
+ safe_unpack32(&object_ptr->grp_cpus, buffer);
+ safe_unpack32(&object_ptr->grp_jobs, buffer);
+ safe_unpack32(&object_ptr->grp_mem, buffer);
+ safe_unpack32(&object_ptr->grp_nodes, buffer);
+ safe_unpack32(&object_ptr->grp_submit_jobs, buffer);
+ safe_unpack32(&object_ptr->grp_wall, buffer);
+
+ safe_unpack64(&object_ptr->max_cpu_mins_pj, buffer);
+ safe_unpack64(&object_ptr->max_cpu_run_mins_pu, buffer);
+ safe_unpack32(&object_ptr->max_cpus_pj, buffer);
+ safe_unpack32(&object_ptr->max_cpus_pu, buffer);
+ safe_unpack32(&object_ptr->max_jobs_pu, buffer);
+ safe_unpack32(&object_ptr->max_nodes_pj, buffer);
+ safe_unpack32(&object_ptr->max_nodes_pu, buffer);
+ safe_unpack32(&object_ptr->max_submit_jobs_pu, buffer);
+ safe_unpack32(&object_ptr->max_wall_pj, buffer);
+
+ safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer);
+
+ unpack_bit_str(&object_ptr->preempt_bitstr, buffer);
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->preempt_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->preempt_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack16(&object_ptr->preempt_mode, buffer);
+ safe_unpack32(&object_ptr->priority, buffer);
+
+ safe_unpackdouble(&object_ptr->usage_factor, buffer);
+ safe_unpackdouble(&object_ptr->usage_thres, buffer);
+ } else if (rpc_version >= 9) {
safe_unpackstr_xmalloc(&object_ptr->description,
&uint32_tmp, buffer);
safe_unpack32(&object_ptr->id, buffer);
@@ -2472,7 +2787,380 @@
ListIterator itr = NULL;
slurmdb_association_cond_t *object = (slurmdb_association_cond_t *)in;
- if(rpc_version >= 8) {
+ if(rpc_version >= 10) {
+ if(!object) {
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack16(0, buffer);
+
+ pack32(NO_VAL, buffer);
+ pack32(NO_VAL, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack_time(0, buffer);
+ pack_time(0, buffer);
+
+ pack32(NO_VAL, buffer);
+
+ pack16(0, buffer);
+ pack16(0, buffer);
+ pack16(0, buffer);
+ pack16(0, buffer);
+ pack16(0, buffer);
+ pack16(0, buffer);
+ return;
+ }
+
+ if(object->acct_list)
+ count = list_count(object->acct_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->acct_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->cluster_list)
+ count = list_count(object->cluster_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->cluster_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->def_qos_id_list)
+ count = list_count(object->def_qos_id_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->def_qos_id_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->fairshare_list)
+ count = list_count(object->fairshare_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->fairshare_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_cpu_mins_list)
+ count = list_count(object->grp_cpu_mins_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_cpu_mins_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_cpu_run_mins_list)
+ count = list_count(object->grp_cpu_run_mins_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(
+ object->grp_cpu_run_mins_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_cpus_list)
+ count = list_count(object->grp_cpus_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_cpus_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_jobs_list)
+ count = list_count(object->grp_jobs_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_jobs_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_mem_list)
+ count = list_count(object->grp_mem_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_mem_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_nodes_list)
+ count = list_count(object->grp_nodes_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_nodes_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_submit_jobs_list)
+ count = list_count(object->grp_submit_jobs_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(
+ object->grp_submit_jobs_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->grp_wall_list)
+ count = list_count(object->grp_wall_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->grp_wall_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->id_list)
+ count = list_count(object->id_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->id_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ }
+ count = NO_VAL;
+
+ if(object->max_cpu_mins_pj_list)
+ count = list_count(object->max_cpu_mins_pj_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(
+ object->max_cpu_mins_pj_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->max_cpu_run_mins_list)
+ count = list_count(object->max_cpu_run_mins_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(
+ object->max_cpu_run_mins_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->max_cpus_pj_list)
+ count = list_count(object->max_cpus_pj_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->max_cpus_pj_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+ if(object->max_jobs_list)
+ count = list_count(object->max_jobs_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->max_jobs_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+ if(object->max_nodes_pj_list)
+ count = list_count(object->max_nodes_pj_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->max_nodes_pj_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+ if(object->max_submit_jobs_list)
+ count = list_count(object->max_submit_jobs_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(
+ object->max_submit_jobs_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+ if(object->max_wall_pj_list)
+ count = list_count(object->max_wall_pj_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->max_wall_pj_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack16(object->only_defs, buffer);
+
+ if(object->partition_list)
+ count = list_count(object->partition_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->partition_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->parent_acct_list)
+ count = list_count(object->parent_acct_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->parent_acct_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ if(object->qos_list)
+ count = list_count(object->qos_list);
+
+ pack32(count, buffer);
+
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->qos_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack_time(object->usage_end, buffer);
+ pack_time(object->usage_start, buffer);
+
+ if(object->user_list)
+ count = list_count(object->user_list);
+
+ pack32(count, buffer);
+ if(count && count != NO_VAL) {
+ itr = list_iterator_create(object->user_list);
+ while((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+ count = NO_VAL;
+
+ pack16(object->with_usage, buffer);
+ pack16(object->with_deleted, buffer);
+ pack16(object->with_raw_qos, buffer);
+ pack16(object->with_sub_accts, buffer);
+ pack16(object->without_parent_info, buffer);
+ pack16(object->without_parent_limits, buffer);
+ } else if(rpc_version >= 8) {
if(!object) {
pack32(NO_VAL, buffer);
pack32(NO_VAL, buffer);
@@ -3154,7 +3842,287 @@
char *tmp_info = NULL;
*object = object_ptr;
- if(rpc_version >= 8) {
+ if (rpc_version >= 10) {
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->acct_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->acct_list, tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->cluster_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->cluster_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->def_qos_id_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->def_qos_id_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->fairshare_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->fairshare_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_cpu_mins_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_cpu_mins_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_cpu_run_mins_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_cpu_run_mins_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_cpus_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_cpus_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_jobs_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_jobs_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_mem_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_mem_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_nodes_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_nodes_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_submit_jobs_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_submit_jobs_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->grp_wall_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->grp_wall_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->id_list = list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->id_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_cpu_mins_pj_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_cpu_mins_pj_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_cpu_run_mins_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_cpu_run_mins_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_cpus_pj_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_cpus_pj_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_jobs_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_jobs_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_nodes_pj_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_nodes_pj_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_submit_jobs_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_submit_jobs_list,
+ tmp_info);
+ }
+ }
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->max_wall_pj_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->max_wall_pj_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack16(&object_ptr->only_defs, buffer);
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->partition_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->partition_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->parent_acct_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->parent_acct_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->qos_list = list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->qos_list, tmp_info);
+ }
+ }
+
+ safe_unpack_time(&object_ptr->usage_end, buffer);
+ safe_unpack_time(&object_ptr->usage_start, buffer);
+
+ safe_unpack32(&count, buffer);
+ if(count != NO_VAL) {
+ object_ptr->user_list =
+ list_create(slurm_destroy_char);
+ for(i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->user_list, tmp_info);
+ }
+ }
+
+ safe_unpack16(&object_ptr->with_usage, buffer);
+ safe_unpack16(&object_ptr->with_deleted, buffer);
+ safe_unpack16(&object_ptr->with_raw_qos, buffer);
+ safe_unpack16(&object_ptr->with_sub_accts, buffer);
+ safe_unpack16(&object_ptr->without_parent_info, buffer);
+ safe_unpack16(&object_ptr->without_parent_limits, buffer);
+ } else if (rpc_version >= 8) {
safe_unpack32(&count, buffer);
if(count != NO_VAL) {
object_ptr->acct_list =
@@ -3846,7 +4814,224 @@
ListIterator itr = NULL;
slurmdb_job_cond_t *object = (slurmdb_job_cond_t *)in;
- if(rpc_version >= 8) {
+ if (rpc_version >= 10) {
+ if (!object) {
+ pack32(NO_VAL, buffer); /* count(acct_list) */
+ pack32(NO_VAL, buffer); /* count(associd_list) */
+ pack32(NO_VAL, buffer); /* count(cluster_list) */
+ pack32(0, buffer); /* cpus_max */
+ pack32(0, buffer); /* cpus_min */
+ pack16(0, buffer); /* duplicates */
+ pack32(0, buffer); /* exitcode */
+ pack32(NO_VAL, buffer); /* count(groupid_list) */
+ pack32(NO_VAL, buffer); /* count(jobname_list) */
+ pack32(0, buffer); /* nodes_max */
+ pack32(0, buffer); /* nodes_min */
+ pack32(NO_VAL, buffer); /* count(partition_list) */
+ pack32(NO_VAL, buffer); /* count(qos_list) */
+ pack32(NO_VAL, buffer); /* count(resv_list) */
+ pack32(NO_VAL, buffer); /* count(resvid_list) */
+ pack32(NO_VAL, buffer); /* count(step_list) */
+ pack32(NO_VAL, buffer); /* count(state_list) */
+ pack32(0, buffer); /* timelimit_max */
+ pack32(0, buffer); /* timelimit_min */
+ pack_time(0, buffer); /* usage_end */
+ pack_time(0, buffer); /* usage_start */
+ packnull(buffer); /* used_nodes */
+ pack32(NO_VAL, buffer); /* count(userid_list) */
+ pack32(NO_VAL, buffer); /* count(wckey_list) */
+ pack16(0, buffer); /* without_steps */
+ pack16(0, buffer); /* without_usage_truncation */
+ return;
+ }
+
+ if (object->acct_list)
+ count = list_count(object->acct_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->acct_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->associd_list)
+ count = list_count(object->associd_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->associd_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ }
+
+ if (object->cluster_list)
+ count = list_count(object->cluster_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->cluster_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ pack32(object->cpus_max, buffer);
+ pack32(object->cpus_min, buffer);
+ pack16(object->duplicates, buffer);
+ pack32((uint32_t)object->exitcode, buffer);
+
+ if (object->groupid_list)
+ count = list_count(object->groupid_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->groupid_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->jobname_list)
+ count = list_count(object->jobname_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->jobname_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ pack32(object->nodes_max, buffer);
+ pack32(object->nodes_min, buffer);
+
+ if (object->partition_list)
+ count = list_count(object->partition_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->partition_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->qos_list)
+ count = list_count(object->qos_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->qos_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->resv_list)
+ count = list_count(object->resv_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->resv_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->resvid_list)
+ count = list_count(object->resvid_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->resvid_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->step_list)
+ count = list_count(object->step_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->step_list);
+ while ((job = list_next(itr))) {
+ slurmdb_pack_selected_step(job, rpc_version,
+ buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->state_list)
+ count = list_count(object->state_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->state_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ pack32(object->timelimit_max, buffer);
+ pack32(object->timelimit_min, buffer);
+ pack_time(object->usage_end, buffer);
+ pack_time(object->usage_start, buffer);
+
+ packstr(object->used_nodes, buffer);
+
+ if (object->userid_list)
+ count = list_count(object->userid_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->userid_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (object->wckey_list)
+ count = list_count(object->wckey_list);
+ else
+ count = NO_VAL;
+ pack32(count, buffer);
+ if (count && count != NO_VAL) {
+ itr = list_iterator_create(object->wckey_list);
+ while ((tmp_info = list_next(itr))) {
+ packstr(tmp_info, buffer);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ pack16(object->without_steps, buffer);
+ pack16(object->without_usage_truncation, buffer);
+ } else if (rpc_version >= 8) {
if(!object) {
pack32(NO_VAL, buffer);
pack32(NO_VAL, buffer);
@@ -4035,7 +5220,6 @@
if(object->wckey_list)
count = list_count(object->wckey_list);
-
pack32(count, buffer);
if(count && count != NO_VAL) {
itr = list_iterator_create(object->wckey_list);
@@ -4215,7 +5399,6 @@
if(object->wckey_list)
count = list_count(object->wckey_list);
-
pack32(count, buffer);
if(count && count != NO_VAL) {
itr = list_iterator_create(object->wckey_list);
@@ -4243,7 +5426,173 @@
*object = object_ptr;
- if(rpc_version >= 8) {
+ if (rpc_version >= 10) {
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->acct_list = list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->acct_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->associd_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->associd_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->cluster_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->cluster_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&object_ptr->cpus_max, buffer);
+ safe_unpack32(&object_ptr->cpus_min, buffer);
+ safe_unpack16(&object_ptr->duplicates, buffer);
+ safe_unpack32(&uint32_tmp, buffer);
+ object_ptr->exitcode = (int32_t)uint32_tmp;
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->groupid_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->groupid_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->jobname_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->jobname_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&object_ptr->nodes_max, buffer);
+ safe_unpack32(&object_ptr->nodes_min, buffer);
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->partition_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info,
+ &uint32_tmp, buffer);
+ list_append(object_ptr->partition_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->qos_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info,
+ &uint32_tmp, buffer);
+ list_append(object_ptr->qos_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->resv_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info,
+ &uint32_tmp, buffer);
+ list_append(object_ptr->resv_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->resvid_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info,
+ &uint32_tmp, buffer);
+ list_append(object_ptr->resvid_list,
+ tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->step_list =
+ list_create(slurmdb_destroy_selected_step);
+ for (i=0; i<count; i++) {
+ slurmdb_unpack_selected_step(
+ &job, rpc_version, buffer);
+ list_append(object_ptr->step_list, job);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->state_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info,
+ &uint32_tmp, buffer);
+ list_append(object_ptr->state_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&object_ptr->timelimit_max, buffer);
+ safe_unpack32(&object_ptr->timelimit_min, buffer);
+ safe_unpack_time(&object_ptr->usage_end, buffer);
+ safe_unpack_time(&object_ptr->usage_start, buffer);
+
+ safe_unpackstr_xmalloc(&object_ptr->used_nodes,
+ &uint32_tmp, buffer);
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->userid_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->userid_list, tmp_info);
+ }
+ }
+
+ safe_unpack32(&count, buffer);
+ if (count != NO_VAL) {
+ object_ptr->wckey_list =
+ list_create(slurm_destroy_char);
+ for (i=0; i<count; i++) {
+ safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp,
+ buffer);
+ list_append(object_ptr->wckey_list, tmp_info);
+ }
+ }
+
+ safe_unpack16(&object_ptr->without_steps, buffer);
+ safe_unpack16(&object_ptr->without_usage_truncation, buffer);
+ } else if (rpc_version >= 8) {
safe_unpack32(&count, buffer);
if(count != NO_VAL) {
object_ptr->acct_list = list_create(slurm_destroy_char);
diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c
index 458c52e..93e52c6 100644
--- a/src/common/slurmdbd_defs.c
+++ b/src/common/slurmdbd_defs.c
@@ -350,6 +350,13 @@
Buf buffer;
int cnt, rc = SLURM_SUCCESS;
static time_t syslog_time = 0;
+ static int max_agent_queue = 0;
+
+ /* Whatever our max job count is times that by 2 or
+ * MAX_AGENT_QUEUE which ever is bigger */
+ if (!max_agent_queue)
+ max_agent_queue =
+ MAX(MAX_AGENT_QUEUE, slurmctld_conf.max_job_cnt * 2);
buffer = pack_slurmdbd_msg(req, rpc_version);
@@ -363,7 +370,7 @@
}
}
cnt = list_count(agent_list);
- if ((cnt >= (MAX_AGENT_QUEUE / 2)) &&
+ if ((cnt >= (max_agent_queue / 2)) &&
(difftime(time(NULL), syslog_time) > 120)) {
/* Record critical error every 120 seconds */
syslog_time = time(NULL);
@@ -372,9 +379,9 @@
if (callbacks_requested)
(callback.dbd_fail)();
}
- if (cnt == (MAX_AGENT_QUEUE - 1))
+ if (cnt == (max_agent_queue - 1))
cnt -= _purge_job_start_req();
- if (cnt < MAX_AGENT_QUEUE) {
+ if (cnt < max_agent_queue) {
if (list_enqueue(agent_list, buffer) == NULL)
fatal("list_enqueue: memory allocation failure");
} else {
@@ -620,7 +627,8 @@
buffer);
break;
case DBD_GET_CONFIG:
- /* No message to pack */
+ if (rpc_version >= 10)
+ packstr((char *)req->data, buffer);
break;
case DBD_GET_JOBS:
/* Defunct RPC */
@@ -2017,7 +2025,22 @@
info("slurmdbd: agent queue size %u", cnt);
/* Leave item on the queue until processing complete */
if (agent_list) {
- if(list_count(agent_list) > 1) {
+ int handle_agent_count = 1000;
+ if (cnt > handle_agent_count) {
+ int agent_count = 0;
+ ListIterator agent_itr =
+ list_iterator_create(agent_list);
+ list_msg.my_list = list_create(NULL);
+ while ((buffer = list_next(agent_itr))) {
+ list_enqueue(list_msg.my_list, buffer);
+ agent_count++;
+ if (agent_count > handle_agent_count)
+ break;
+ }
+ list_iterator_destroy(agent_itr);
+ buffer = pack_slurmdbd_msg(&list_req,
+ SLURMDBD_VERSION);
+ } else if (cnt > 1) {
list_msg.my_list = agent_list;
buffer = pack_slurmdbd_msg(&list_req,
SLURMDBD_VERSION);
@@ -2047,7 +2070,7 @@
break;
}
error("slurmdbd: Failure sending message: %d: %m", rc);
- } else if(list_msg.my_list) {
+ } else if (list_msg.my_list) {
rc = _handle_mult_rc_ret(SLURMDBD_VERSION,
read_timeout);
} else {
@@ -2074,9 +2097,11 @@
list_msg.my_list as NULL as that is the
sign we sent a mult_msg.
*/
- if(list_msg.my_list)
+ if (list_msg.my_list) {
+ if (list_msg.my_list != agent_list)
+ list_destroy(list_msg.my_list);
list_msg.my_list = NULL;
- else
+ } else
buffer = (Buf) list_dequeue(agent_list);
free_buf(buffer);
@@ -2086,6 +2111,8 @@
got a failure.
*/
if(list_msg.my_list) {
+ if (list_msg.my_list != agent_list)
+ list_destroy(list_msg.my_list);
list_msg.my_list = NULL;
free_buf(buffer);
}
@@ -2160,6 +2187,7 @@
free_buf(buffer);
continue;
}
+
rc = _save_dbd_rec(fd, buffer);
free_buf(buffer);
if (rc != SLURM_SUCCESS)
@@ -2393,7 +2421,8 @@
unpack16(&msg_type, buffer);
set_buf_offset(buffer, offset);
if ((msg_type == DBD_JOB_START) ||
- (msg_type == DBD_STEP_START)) {
+ (msg_type == DBD_STEP_START) ||
+ (msg_type == DBD_STEP_COMPLETE)) {
list_remove(iter);
purged++;
}
diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h
index aa823ed..7f98325 100644
--- a/src/common/slurmdbd_defs.h
+++ b/src/common/slurmdbd_defs.h
@@ -76,7 +76,7 @@
* communicating with it (e.g. it will not accept messages with a
* version higher than SLURMDBD_VERSION).
*/
-#define SLURMDBD_VERSION 9 /* already changed for 2.3 */
+#define SLURMDBD_VERSION 10 /* already changed for 2.4 */
#define SLURMDBD_VERSION_MIN 7
/* SLURM DBD message types */
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 617a7f1..cdfe7e6 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -288,34 +288,34 @@
stepd_get_info(int fd)
{
int req = REQUEST_INFO;
- slurmstepd_info_t *info;
+ slurmstepd_info_t *step_info;
uint16_t protocol_version;
- info = xmalloc(sizeof(slurmstepd_info_t));
+ step_info = xmalloc(sizeof(slurmstepd_info_t));
safe_write(fd, &req, sizeof(int));
- safe_read(fd, &info->uid, sizeof(uid_t));
- safe_read(fd, &info->jobid, sizeof(uint32_t));
- safe_read(fd, &info->stepid, sizeof(uint32_t));
+ safe_read(fd, &step_info->uid, sizeof(uid_t));
+ safe_read(fd, &step_info->jobid, sizeof(uint32_t));
+ safe_read(fd, &step_info->stepid, sizeof(uint32_t));
safe_read(fd, &protocol_version, sizeof(uint16_t));
if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
- safe_read(fd, &info->nodeid, sizeof(uint32_t));
- safe_read(fd, &info->job_mem_limit, sizeof(uint32_t));
- safe_read(fd, &info->step_mem_limit, sizeof(uint32_t));
+ safe_read(fd, &step_info->nodeid, sizeof(uint32_t));
+ safe_read(fd, &step_info->job_mem_limit, sizeof(uint32_t));
+ safe_read(fd, &step_info->step_mem_limit, sizeof(uint32_t));
} else {
- info->nodeid = protocol_version << 16;
+ step_info->nodeid = protocol_version << 16;
safe_read(fd, &protocol_version, sizeof(uint16_t));
- info->nodeid |= protocol_version;
- safe_read(fd, &info->job_mem_limit, sizeof(uint32_t));
- info->step_mem_limit = info->job_mem_limit;
+ step_info->nodeid |= protocol_version;
+ safe_read(fd, &step_info->job_mem_limit, sizeof(uint32_t));
+ step_info->step_mem_limit = step_info->job_mem_limit;
verbose("Old version slurmstepd for step %u.%u",
- info->jobid, info->stepid);
+ step_info->jobid, step_info->stepid);
}
- return info;
+ return step_info;
rwfail:
- xfree(info);
+ xfree(step_info);
return NULL;
}
@@ -872,6 +872,9 @@
int
stepd_completion(int fd, step_complete_msg_t *sent)
{
+#if (SLURM_PROTOCOL_VERSION <= SLURM_2_4_PROTOCOL_VERSION)
+/* FIXME: Remove this code plus the read code from src/slurmd/slurmstepd/req.c
+ * in SLURM version 2.5 */
int req = REQUEST_STEP_COMPLETION;
int rc;
int errnum = 0;
@@ -891,6 +894,47 @@
return rc;
rwfail:
return -1;
+#else
+ int req = REQUEST_STEP_COMPLETION_V2;
+ int rc;
+ int errnum = 0;
+ Buf buffer;
+ int len = 0;
+ int version = SLURM_PROTOCOL_VERSION;
+
+ buffer = init_buf(0);
+
+ debug("Entering stepd_completion, range_first = %d, range_last = %d",
+ sent->range_first, sent->range_last);
+ safe_write(fd, &req, sizeof(int));
+ safe_write(fd, &version, sizeof(int));
+ safe_write(fd, &sent->range_first, sizeof(int));
+ safe_write(fd, &sent->range_last, sizeof(int));
+ safe_write(fd, &sent->step_rc, sizeof(int));
+ /*
+ * We must not use setinfo over a pipe with slurmstepd here
+ * Indeed, slurmd does a large use of getinfo over a pipe
+ * with slurmstepd and doing the reverse can result in a deadlock
+ * scenario with slurmstepd :
+ * slurmd(lockforread,write)/slurmstepd(write,lockforread)
+ * Do pack/unpack instead to be sure of independances of
+ * slurmd and slurmstepd
+ */
+ jobacct_gather_g_pack(sent->jobacct, SLURM_PROTOCOL_VERSION, buffer);
+ len = get_buf_offset(buffer);
+ safe_write(fd, &len, sizeof(int));
+ safe_write(fd, get_buf_data(buffer), len);
+ free_buf(buffer);
+
+ /* Receive the return code and errno */
+ safe_read(fd, &rc, sizeof(int));
+ safe_read(fd, &errnum, sizeof(int));
+
+ errno = errnum;
+ return rc;
+rwfail:
+ return -1;
+#endif
}
/* Wait for a file descriptor to be readable (up to 300 seconds).
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index 62b2e28..2a1be08 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -69,11 +69,12 @@
REQUEST_STEP_SUSPEND,
REQUEST_STEP_RESUME,
REQUEST_STEP_TERMINATE,
- REQUEST_STEP_COMPLETION,
+ REQUEST_STEP_COMPLETION, /* See REQUEST_STEP_COMPLETION_V2 below */
REQUEST_STEP_TASK_INFO,
REQUEST_STEP_LIST_PIDS,
REQUEST_STEP_RECONFIGURE,
REQUEST_STEP_STAT,
+ REQUEST_STEP_COMPLETION_V2,
} step_msg_t;
typedef enum {
diff --git a/src/common/strnatcmp.c b/src/common/strnatcmp.c
new file mode 100644
index 0000000..c91d87f
--- /dev/null
+++ b/src/common/strnatcmp.c
@@ -0,0 +1,178 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+ strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* partial change history:
+ *
+ * 2004-10-10 mbp: Lift out character type dependencies into macros.
+ *
+ * Eric Sosman pointed out that ctype functions take a parameter whose
+ * value must be that of an unsigned int, even on platforms that have
+ * negative chars in their default char type.
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "strnatcmp.h"
+
+
+/* These are defined as macros to make it easier to adapt this code to
+ * different characters types or comparison functions. */
+static inline int
+nat_isdigit(nat_char a)
+{
+ return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(nat_char a)
+{
+ return isspace((unsigned char) a);
+}
+
+
+static inline nat_char
+nat_toupper(nat_char a)
+{
+ return toupper((unsigned char) a);
+}
+
+
+
+static int
+compare_right(nat_char const *a, nat_char const *b)
+{
+ int bias = 0;
+
+ /* The longest run of digits wins. That aside, the greatest
+ value wins, but we can't know that it will until we've scanned
+ both numbers to know that they have the same magnitude, so we
+ remember it in BIAS. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return bias;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b) {
+ if (!bias)
+ bias = -1;
+ } else if (*a > *b) {
+ if (!bias)
+ bias = +1;
+ } else if (!*a && !*b)
+ return bias;
+ }
+
+ return 0;
+}
+
+
+static int
+compare_left(nat_char const *a, nat_char const *b)
+{
+ /* Compare two left-aligned numbers: the first to have a
+ different value wins. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return 0;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return +1;
+ }
+
+ return 0;
+}
+
+
+static int strnatcmp0(nat_char const *a, nat_char const *b, int fold_case)
+{
+ int ai, bi;
+ nat_char ca, cb;
+ int fractional, result;
+
+ assert(a && b);
+ ai = bi = 0;
+ while (1) {
+ ca = a[ai]; cb = b[bi];
+
+ /* skip over leading spaces or zeros */
+ while (nat_isspace(ca))
+ ca = a[++ai];
+
+ while (nat_isspace(cb))
+ cb = b[++bi];
+
+ /* process run of digits */
+ if (nat_isdigit(ca) && nat_isdigit(cb)) {
+ fractional = (ca == '0' || cb == '0');
+
+ if (fractional) {
+ if ((result = compare_left(a+ai, b+bi)) != 0)
+ return result;
+ } else {
+ if ((result = compare_right(a+ai, b+bi)) != 0)
+ return result;
+ }
+ }
+
+ if (!ca && !cb) {
+ /* The strings compare the same. Perhaps the caller
+ will want to call strcmp to break the tie. */
+ return 0;
+ }
+
+ if (fold_case) {
+ ca = nat_toupper(ca);
+ cb = nat_toupper(cb);
+ }
+
+ if (ca < cb)
+ return -1;
+ else if (ca > cb)
+ return +1;
+
+ ++ai; ++bi;
+ }
+}
+
+
+
+int strnatcmp(nat_char const *a, nat_char const *b) {
+ return strnatcmp0(a, b, 0);
+}
+
+
+/* Compare, recognizing numeric string and ignoring case. */
+int strnatcasecmp(nat_char const *a, nat_char const *b) {
+ return strnatcmp0(a, b, 1);
+}
diff --git a/src/common/strnatcmp.h b/src/common/strnatcmp.h
new file mode 100644
index 0000000..51a3c4e
--- /dev/null
+++ b/src/common/strnatcmp.h
@@ -0,0 +1,31 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+ strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* CUSTOMIZATION SECTION
+ *
+ * You can change this typedef, but must then also change the inline
+ * functions in strnatcmp.c */
+typedef char nat_char;
+
+int strnatcmp(nat_char const *a, nat_char const *b);
+int strnatcasecmp(nat_char const *a, nat_char const *b);
diff --git a/src/common/switch.c b/src/common/switch.c
index bbfc9c9..2d4fb84 100644
--- a/src/common/switch.c
+++ b/src/common/switch.c
@@ -127,7 +127,7 @@
static slurm_switch_context_t *g_context = NULL;
static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER;
-
+static bool init_run = false;
static slurm_switch_context_t *
_slurm_switch_context_create(const char *switch_type)
@@ -288,6 +288,9 @@
int retval = SLURM_SUCCESS;
char *switch_type = NULL;
+ if ( init_run && g_context )
+ return retval;
+
slurm_mutex_lock( &context_lock );
if ( g_context )
@@ -306,7 +309,8 @@
_slurm_switch_context_destroy( g_context );
g_context = NULL;
retval = SLURM_ERROR;
- }
+ } else
+ init_run = true;
done:
slurm_mutex_unlock( &context_lock );
@@ -321,6 +325,7 @@
if (!g_context)
return SLURM_SUCCESS;
+ init_run = false;
rc = _slurm_switch_context_destroy(g_context);
return rc;
}
diff --git a/src/common/working_cluster.c b/src/common/working_cluster.c
index 12e3eed..56ea158 100644
--- a/src/common/working_cluster.c
+++ b/src/common/working_cluster.c
@@ -83,9 +83,8 @@
{
if (is_cray_system())
return 1; /* Cray uses 1-dimensional hostlists */
- else if (working_cluster_rec)
- return working_cluster_rec->dimensions;
- return SYSTEM_DIMENSIONS;
+
+ return slurmdb_setup_cluster_dims();
}
extern uint32_t slurmdb_setup_cluster_flags(void)
diff --git a/src/common/xcgroup.c b/src/common/xcgroup.c
index 2a03ec0..778da27 100644
--- a/src/common/xcgroup.c
+++ b/src/common/xcgroup.c
@@ -54,6 +54,7 @@
#include <string.h>
#include <strings.h>
#include <dirent.h>
+#include <sys/mount.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
@@ -129,12 +130,14 @@
* returned values:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
+ *
+ * If an error occurs, errno will be set.
*/
int xcgroup_ns_mount(xcgroup_ns_t* cgns)
{
int fstatus;
- char* mount_cmd_fmt;
- char mount_cmd[1024];
+ char* options;
+ char opt_combined[1024];
char* mnt_point;
char* p;
@@ -159,8 +162,8 @@
p = mnt_point;
while ((p = index(p+1, '/')) != NULL) {
*p = '\0';
- mkdir(mnt_point, 0755);
- if (errno != EEXIST) {
+ fstatus = mkdir(mnt_point, 0755);
+ if (fstatus && errno != EEXIST) {
debug("unable to create cgroup ns required "
"directory '%s'", mnt_point);
xfree(mnt_point);
@@ -182,21 +185,20 @@
umask(omask);
if (cgns->mnt_args == NULL ||
- strlen(cgns->mnt_args) == 0) {
- mount_cmd_fmt = "/bin/mount -o %s%s -t cgroup none %s";
+ strlen(cgns->mnt_args) == 0)
+ options = cgns->subsystems;
+ else {
+ if (snprintf(opt_combined, sizeof(opt_combined), "%s,%s",
+ cgns->subsystems, cgns->mnt_args)
+ >= sizeof(opt_combined)) {
+ debug2("unable to build cgroup options string");
+ return XCGROUP_ERROR;
+ }
+ options = opt_combined;
}
- else
- mount_cmd_fmt = "/bin/mount -o %s, %s -t cgroup none %s";
- if (snprintf(mount_cmd, 1024, mount_cmd_fmt, cgns->subsystems,
- cgns->mnt_args, cgns->mnt_point) >= 1024) {
- debug2("unable to build cgroup ns mount cmd line");
- return XCGROUP_ERROR;
- }
- else
- debug3("cgroup mount cmd line is '%s'", mount_cmd);
-
- if (system(mount_cmd))
+ if (mount("cgroup", cgns->mnt_point, "cgroup",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, options))
return XCGROUP_ERROR;
else {
/* we then set the release_agent if necessary */
@@ -217,26 +219,14 @@
* returned values:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
+ *
+ * If an error occurs, errno will be set.
*/
int xcgroup_ns_umount(xcgroup_ns_t* cgns)
{
- char* umount_cmd_fmt;
- char umount_cmd[1024];
-
- umount_cmd_fmt = "/bin/umount %s";
-
- if (snprintf(umount_cmd, 1024, umount_cmd_fmt,
- cgns->mnt_point) >= 1024) {
- debug2("unable to build cgroup ns umount cmd line");
+ if (umount(cgns->mnt_point))
return XCGROUP_ERROR;
- }
- else
- debug3("cgroup ns umount cmd line is '%s'", umount_cmd);
-
- if (system(umount_cmd))
- return XCGROUP_ERROR;
- else
- return XCGROUP_SUCCESS;
+ return XCGROUP_SUCCESS;
}
/*
@@ -510,43 +500,71 @@
return XCGROUP_SUCCESS;
}
+static int cgroup_procs_readable (xcgroup_t *cg)
+{
+ struct stat st;
+ char *path = NULL;
+ int rc = 0;
+
+ xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
+ if ((stat (path, &st) >= 0) && (st.st_mode & S_IRUSR))
+ rc = 1;
+ xfree (path);
+ return (rc);
+}
+
+static int cgroup_procs_writable (xcgroup_t *cg)
+{
+ struct stat st;
+ char *path = NULL;
+ int rc = 0;
+
+ xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
+ if ((stat (path, &st) >= 0) && (st.st_mode & S_IWUSR))
+ rc = 1;
+ xfree (path);
+ return (rc);
+}
+
+// This call is not intended to be used to move thread pids
int xcgroup_add_pids(xcgroup_t* cg, pid_t* pids, int npids)
{
int fstatus = XCGROUP_ERROR;
- char* cpath = cg->path;
- char file_path[PATH_MAX];
-
- if (snprintf(file_path, PATH_MAX, "%s/tasks",
- cpath) >= PATH_MAX) {
- debug2("unable to add pids to '%s' : %m", cpath);
- return fstatus;
- }
-
- fstatus = _file_write_uint32s(file_path, (uint32_t*)pids, npids);
+ char* path = NULL;
+
+ // If possible use cgroup.procs to add the processes atomically
+ if (cgroup_procs_writable (cg))
+ xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
+ else
+ xstrfmtcat (path, "%s/%s", cg->path, "tasks");
+
+ fstatus = _file_write_uint32s(path, (uint32_t*)pids, npids);
if (fstatus != XCGROUP_SUCCESS)
- debug2("unable to add pids to '%s'", cpath);
+ debug2("unable to add pids to '%s'", cg->path);
+
+ xfree(path);
return fstatus;
}
-int
-xcgroup_get_pids(xcgroup_t* cg, pid_t **pids, int *npids)
+// This call is not intended to be used to get thread pids
+int xcgroup_get_pids(xcgroup_t* cg, pid_t **pids, int *npids)
{
int fstatus = XCGROUP_ERROR;
- char* cpath = cg->path;
- char file_path[PATH_MAX];
-
+ char* path = NULL;
+
if (pids == NULL || npids == NULL)
return SLURM_ERROR;
-
- if (snprintf(file_path, PATH_MAX, "%s/tasks",
- cpath) >= PATH_MAX) {
- debug2("unable to get pids of '%s' : %m", cpath);
- return fstatus;
- }
-
- fstatus = _file_read_uint32s(file_path, (uint32_t**)pids, npids);
+
+ if (cgroup_procs_readable (cg))
+ xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
+ else
+ xstrfmtcat (path, "%s/%s", cg->path, "tasks");
+
+ fstatus = _file_read_uint32s(path, (uint32_t**)pids, npids);
if (fstatus != XCGROUP_SUCCESS)
- debug2("unable to get pids of '%s'", cpath);
+ debug2("unable to get pids of '%s'", cg->path);
+
+ xfree(path);
return fstatus;
}
@@ -773,19 +791,6 @@
return XCGROUP_SUCCESS;
}
-static int cgroup_procs_writable (xcgroup_t *cg)
-{
- struct stat st;
- char *path = NULL;
- int rc = 0;
-
- xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
- if ((stat (path, &st) >= 0) && (st.st_mode & S_IWUSR))
- rc = 1;
- xfree (path);
- return (rc);
-}
-
int xcgroup_move_process (xcgroup_t *cg, pid_t pid)
{
if (!cgroup_procs_writable (cg))
diff --git a/src/common/xcgroup.h b/src/common/xcgroup.h
index 4f03b5c..a5861f8 100644
--- a/src/common/xcgroup.h
+++ b/src/common/xcgroup.h
@@ -97,6 +97,8 @@
* returned values:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
+ *
+ * If an error occurs, errno will be set.
*/
int xcgroup_ns_mount(xcgroup_ns_t* cgns);
@@ -106,6 +108,8 @@
* returned values:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
+ *
+ * If an error occurs, errno will be set.
*/
int xcgroup_ns_umount(xcgroup_ns_t* cgns);
diff --git a/src/common/xcgroup_read_config.c b/src/common/xcgroup_read_config.c
index 48fdf7b..51368c7 100644
--- a/src/common/xcgroup_read_config.c
+++ b/src/common/xcgroup_read_config.c
@@ -34,6 +34,10 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
+#ifndef _ISOC99_SOURCE
+# define _ISOC99_SOURCE /* strtof() */
+#endif
+
#include <pwd.h>
#include <stdlib.h>
#include <string.h>
diff --git a/src/common/xcpuinfo.c b/src/common/xcpuinfo.c
index 19e7431..fc27ac9 100644
--- a/src/common/xcpuinfo.c
+++ b/src/common/xcpuinfo.c
@@ -671,7 +671,7 @@
/* xcpuinfo_mac_to_abs(mach,&abs); */
int
-xcpuinfo_init()
+xcpuinfo_init(void)
{
if ( initialized )
return XCPUINFO_SUCCESS;
@@ -689,7 +689,7 @@
}
int
-xcpuinfo_fini()
+xcpuinfo_fini(void)
{
if ( ! initialized )
return XCPUINFO_SUCCESS;
diff --git a/src/common/xcpuinfo.h b/src/common/xcpuinfo.h
index 1e8cdd5..0c93b84 100644
--- a/src/common/xcpuinfo.h
+++ b/src/common/xcpuinfo.h
@@ -57,7 +57,7 @@
* - XCPUINFO_ERROR
* - XCPUINFO_SUCCESS
*/
-int xcpuinfo_init();
+int xcpuinfo_init(void);
/*
* Destroy xcpuinfo internal data
@@ -66,7 +66,7 @@
* - XCPUINFO_ERROR
* - XCPUINFO_SUCCESS
*/
-int xcpuinfo_fini();
+int xcpuinfo_fini(void);
/*
* Use xcpuinfo internal data to convert an abstract range
diff --git a/src/database/Makefile.in b/src/database/Makefile.in
index 30f264a..fad7ef4 100644
--- a/src/database/Makefile.in
+++ b/src/database/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -213,6 +213,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -249,6 +250,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -396,9 +398,9 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libslurm_mysql.la: $(libslurm_mysql_la_OBJECTS) $(libslurm_mysql_la_DEPENDENCIES)
+libslurm_mysql.la: $(libslurm_mysql_la_OBJECTS) $(libslurm_mysql_la_DEPENDENCIES) $(EXTRA_libslurm_mysql_la_DEPENDENCIES)
$(libslurm_mysql_la_LINK) $(am_libslurm_mysql_la_rpath) $(libslurm_mysql_la_OBJECTS) $(libslurm_mysql_la_LIBADD) $(LIBS)
-libslurm_pgsql.la: $(libslurm_pgsql_la_OBJECTS) $(libslurm_pgsql_la_DEPENDENCIES)
+libslurm_pgsql.la: $(libslurm_pgsql_la_OBJECTS) $(libslurm_pgsql_la_DEPENDENCIES) $(EXTRA_libslurm_pgsql_la_DEPENDENCIES)
$(libslurm_pgsql_la_LINK) $(am_libslurm_pgsql_la_rpath) $(libslurm_pgsql_la_OBJECTS) $(libslurm_pgsql_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/db_api/Makefile.in b/src/db_api/Makefile.in
index e74aadf..2336647 100644
--- a/src/db_api/Makefile.in
+++ b/src/db_api/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -108,6 +108,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libslurmdb_la_DEPENDENCIES = \
@@ -223,6 +229,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -259,6 +266,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -487,7 +495,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libslurmdb.la: $(libslurmdb_la_OBJECTS) $(libslurmdb_la_DEPENDENCIES)
+libslurmdb.la: $(libslurmdb_la_OBJECTS) $(libslurmdb_la_DEPENDENCIES) $(EXTRA_libslurmdb_la_DEPENDENCIES)
$(libslurmdb_la_LINK) -rpath $(libdir) $(libslurmdb_la_OBJECTS) $(libslurmdb_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@@ -649,10 +657,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/db_api/extra_get_functions.c b/src/db_api/extra_get_functions.c
index c87166b..e63ecb5 100644
--- a/src/db_api/extra_get_functions.c
+++ b/src/db_api/extra_get_functions.c
@@ -55,7 +55,7 @@
*/
extern List slurmdb_config_get(void *db_conn)
{
- return acct_storage_g_get_config(db_conn);
+ return acct_storage_g_get_config(db_conn, "slurmdbd.conf");
}
/*
diff --git a/src/plugins/Makefile.in b/src/plugins/Makefile.in
index 7aa54ce..84cdbdb 100644
--- a/src/plugins/Makefile.in
+++ b/src/plugins/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -591,10 +593,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/Makefile.am b/src/plugins/accounting_storage/Makefile.am
index 4c228e1..b787666 100644
--- a/src/plugins/accounting_storage/Makefile.am
+++ b/src/plugins/accounting_storage/Makefile.am
@@ -1,3 +1,3 @@
# Makefile for storage plugins
-SUBDIRS = filetxt mysql pgsql none slurmdbd
+SUBDIRS = common filetxt mysql pgsql none slurmdbd
diff --git a/src/plugins/accounting_storage/Makefile.in b/src/plugins/accounting_storage/Makefile.in
index 8a28f44..1217cc1 100644
--- a/src/plugins/accounting_storage/Makefile.in
+++ b/src/plugins/accounting_storage/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -326,7 +328,7 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-SUBDIRS = filetxt mysql pgsql none slurmdbd
+SUBDIRS = common filetxt mysql pgsql none slurmdbd
all: all-recursive
.SUFFIXES:
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/common/Makefile.in b/src/plugins/accounting_storage/common/Makefile.in
index c08c409..8f2721b 100644
--- a/src/plugins/accounting_storage/common/Makefile.in
+++ b/src/plugins/accounting_storage/common/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -184,6 +184,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -220,6 +221,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -359,7 +361,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libaccounting_storage_common.la: $(libaccounting_storage_common_la_OBJECTS) $(libaccounting_storage_common_la_DEPENDENCIES)
+libaccounting_storage_common.la: $(libaccounting_storage_common_la_OBJECTS) $(libaccounting_storage_common_la_DEPENDENCIES) $(EXTRA_libaccounting_storage_common_la_DEPENDENCIES)
$(LINK) $(libaccounting_storage_common_la_OBJECTS) $(libaccounting_storage_common_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -493,10 +495,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/common/common_as.c b/src/plugins/accounting_storage/common/common_as.c
index 21beb50..5d2245e 100644
--- a/src/plugins/accounting_storage/common/common_as.c
+++ b/src/plugins/accounting_storage/common/common_as.c
@@ -153,6 +153,8 @@
assoc->grp_cpus = INFINITE;
if(assoc->grp_jobs == NO_VAL)
assoc->grp_jobs = INFINITE;
+ if(assoc->grp_mem == NO_VAL)
+ assoc->grp_mem = INFINITE;
if(assoc->grp_nodes == NO_VAL)
assoc->grp_nodes = INFINITE;
if(assoc->grp_submit_jobs == NO_VAL)
@@ -192,6 +194,8 @@
qos->grp_cpus = INFINITE;
if(qos->grp_jobs == NO_VAL)
qos->grp_jobs = INFINITE;
+ if(qos->grp_mem == NO_VAL)
+ qos->grp_mem = INFINITE;
if(qos->grp_nodes == NO_VAL)
qos->grp_nodes = INFINITE;
if(qos->grp_submit_jobs == NO_VAL)
@@ -827,10 +831,10 @@
else { /* file shuffle */
(void) unlink(old_file);
if (link(reg_file, old_file))
- error("Link(%s, %s): %m", reg_file, old_file);
+ debug4("Link(%s, %s): %m", reg_file, old_file);
(void) unlink(reg_file);
if (link(new_file, reg_file))
- error("Link(%s, %s): %m", new_file, reg_file);
+ debug4("Link(%s, %s): %m", new_file, reg_file);
(void) unlink(new_file);
}
xfree(old_file);
diff --git a/src/plugins/accounting_storage/common/common_as.h b/src/plugins/accounting_storage/common/common_as.h
index 8da209d..b2f26bd 100644
--- a/src/plugins/accounting_storage/common/common_as.h
+++ b/src/plugins/accounting_storage/common/common_as.h
@@ -43,8 +43,6 @@
#include "src/common/assoc_mgr.h"
-#define THIS_FILE ((strrchr(__FILE__, '/') ?: __FILE__ - 1) + 1)
-
extern int addto_update_list(List update_list, slurmdb_update_type_t type,
void *object);
diff --git a/src/plugins/accounting_storage/filetxt/Makefile.in b/src/plugins/accounting_storage/filetxt/Makefile.in
index b93ff8d..3bb37d2 100644
--- a/src/plugins/accounting_storage/filetxt/Makefile.in
+++ b/src/plugins/accounting_storage/filetxt/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
accounting_storage_filetxt_la_LIBADD =
@@ -211,6 +217,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -247,6 +254,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,7 +415,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-accounting_storage_filetxt.la: $(accounting_storage_filetxt_la_OBJECTS) $(accounting_storage_filetxt_la_DEPENDENCIES)
+accounting_storage_filetxt.la: $(accounting_storage_filetxt_la_OBJECTS) $(accounting_storage_filetxt_la_DEPENDENCIES) $(EXTRA_accounting_storage_filetxt_la_DEPENDENCIES)
$(accounting_storage_filetxt_la_LINK) -rpath $(pkglibdir) $(accounting_storage_filetxt_la_OBJECTS) $(accounting_storage_filetxt_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -545,10 +553,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
index 4f368d2..83ac300 100644
--- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
+++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
@@ -449,7 +449,7 @@
return NULL;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c
index a8b9e2c..4a98567 100644
--- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c
+++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c
@@ -40,6 +40,8 @@
* This file is patterned after jobcomp_linux.c, written by Morris Jette and
* Copyright (C) 2002 The Regents of the University of California.
\*****************************************************************************/
+
+#include <sys/resource.h> /* for struct rusage */
#include <stdlib.h>
#include <ctype.h>
#include <sys/stat.h>
@@ -1130,6 +1132,20 @@
}
foundgid:
+ if (job_cond->jobname_list
+ && list_count(job_cond->jobname_list)) {
+ itr = list_iterator_create(job_cond->jobname_list);
+ while((object = list_next(itr))) {
+ if (!strcasecmp(f[F_JOBNAME], object)) {
+ list_iterator_destroy(itr);
+ goto foundjobname;
+ }
+ }
+ list_iterator_destroy(itr);
+ continue; /* no match */
+ }
+ foundjobname:
+
if (job_cond->step_list
&& list_count(job_cond->step_list)) {
itr = list_iterator_create(job_cond->step_list);
diff --git a/src/plugins/accounting_storage/mysql/Makefile.am b/src/plugins/accounting_storage/mysql/Makefile.am
index 1adf702..a884637 100644
--- a/src/plugins/accounting_storage/mysql/Makefile.am
+++ b/src/plugins/accounting_storage/mysql/Makefile.am
@@ -8,8 +8,8 @@
AS_MYSQL_SOURCES = accounting_storage_mysql.c accounting_storage_mysql.h \
as_mysql_acct.c as_mysql_acct.h \
- as_mysql_archive.c as_mysql_arvhive.h \
- as_mysql_assoc.c as_mysql_accoc.h \
+ as_mysql_archive.c as_mysql_archive.h \
+ as_mysql_assoc.c as_mysql_assoc.h \
as_mysql_cluster.c as_mysql_cluster.h \
as_mysql_convert.c as_mysql_convert.h \
as_mysql_job.c as_mysql_job.h \
diff --git a/src/plugins/accounting_storage/mysql/Makefile.in b/src/plugins/accounting_storage/mysql/Makefile.in
index 4d6e9b9..4d08c2c 100644
--- a/src/plugins/accounting_storage/mysql/Makefile.in
+++ b/src/plugins/accounting_storage/mysql/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -114,7 +120,7 @@
am__accounting_storage_mysql_la_SOURCES_DIST = \
accounting_storage_mysql.c accounting_storage_mysql.h \
as_mysql_acct.c as_mysql_acct.h as_mysql_archive.c \
- as_mysql_arvhive.h as_mysql_assoc.c as_mysql_accoc.h \
+ as_mysql_archive.h as_mysql_assoc.c as_mysql_assoc.h \
as_mysql_cluster.c as_mysql_cluster.h as_mysql_convert.c \
as_mysql_convert.h as_mysql_job.c as_mysql_job.h \
as_mysql_jobacct_process.c as_mysql_jobacct_process.h \
@@ -146,7 +152,7 @@
am__EXTRA_accounting_storage_mysql_la_SOURCES_DIST = \
accounting_storage_mysql.c accounting_storage_mysql.h \
as_mysql_acct.c as_mysql_acct.h as_mysql_archive.c \
- as_mysql_arvhive.h as_mysql_assoc.c as_mysql_accoc.h \
+ as_mysql_archive.h as_mysql_assoc.c as_mysql_assoc.h \
as_mysql_cluster.c as_mysql_cluster.h as_mysql_convert.c \
as_mysql_convert.h as_mysql_job.c as_mysql_job.h \
as_mysql_jobacct_process.c as_mysql_jobacct_process.h \
@@ -261,6 +267,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -297,6 +304,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -389,8 +397,8 @@
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
AS_MYSQL_SOURCES = accounting_storage_mysql.c accounting_storage_mysql.h \
as_mysql_acct.c as_mysql_acct.h \
- as_mysql_archive.c as_mysql_arvhive.h \
- as_mysql_assoc.c as_mysql_accoc.h \
+ as_mysql_archive.c as_mysql_archive.h \
+ as_mysql_assoc.c as_mysql_assoc.h \
as_mysql_cluster.c as_mysql_cluster.h \
as_mysql_convert.c as_mysql_convert.h \
as_mysql_job.c as_mysql_job.h \
@@ -480,7 +488,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-accounting_storage_mysql.la: $(accounting_storage_mysql_la_OBJECTS) $(accounting_storage_mysql_la_DEPENDENCIES)
+accounting_storage_mysql.la: $(accounting_storage_mysql_la_OBJECTS) $(accounting_storage_mysql_la_DEPENDENCIES) $(EXTRA_accounting_storage_mysql_la_DEPENDENCIES)
$(accounting_storage_mysql_la_LINK) $(am_accounting_storage_mysql_la_rpath) $(accounting_storage_mysql_la_OBJECTS) $(accounting_storage_mysql_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -744,10 +752,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index 9789801..db69309 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -260,9 +260,8 @@
}
/* this function is here to see if any of what we are trying to remove
- * has jobs that are or were once running. So if we have jobs and the
- * object is less than a day old we don't want to delete it only set
- * the deleted flag.
+ * has jobs that are not completed. If we have jobs and the object is less
+ * than a day old we don't want to delete it, only set the deleted flag.
*/
static bool _check_jobs_before_remove(mysql_conn_t *mysql_conn,
char *cluster_name,
@@ -295,11 +294,11 @@
"where t1.lft between "
"t2.lft and t2.rgt && (%s) "
"and t0.id_assoc=t1.id_assoc "
- "and t0.time_end=0 && t0.state=%d;",
+ "and t0.time_end=0 && t0.state<%d;",
object, cluster_name, job_table,
cluster_name, assoc_table,
cluster_name, assoc_table,
- assoc_char, JOB_RUNNING);
+ assoc_char, JOB_COMPLETE);
xfree(object);
} else {
query = xstrdup_printf(
@@ -368,10 +367,10 @@
query = xstrdup_printf("select %s "
"from \"%s_%s\" as t1, \"%s_%s\" as t2 "
"where (%s) and t1.id_assoc=t2.id_assoc "
- "and t1.time_end=0 && t1.state=%d;",
+ "and t1.time_end=0 && t1.state<%d;",
object, cluster_name, job_table,
cluster_name, assoc_table,
- assoc_char, JOB_RUNNING);
+ assoc_char, JOB_COMPLETE);
xfree(object);
} else {
query = xstrdup_printf(
@@ -498,13 +497,14 @@
{ "grp_jobs", "int default NULL" },
{ "grp_submit_jobs", "int default NULL" },
{ "grp_cpus", "int default NULL" },
+ { "grp_mem", "int default NULL" },
{ "grp_nodes", "int default NULL" },
{ "grp_wall", "int default NULL" },
{ "grp_cpu_mins", "bigint default NULL" },
{ "grp_cpu_run_mins", "bigint default NULL" },
{ "preempt", "text not null default ''" },
{ "preempt_mode", "int default 0" },
- { "priority", "int default 0" },
+ { "priority", "int unsigned default 0" },
{ "usage_factor", "double default 1.0 not null" },
{ "usage_thres", "double default NULL" },
{ NULL, NULL}
@@ -867,6 +867,7 @@
{ "grp_jobs", "int default NULL" },
{ "grp_submit_jobs", "int default NULL" },
{ "grp_cpus", "int default NULL" },
+ { "grp_mem", "int default NULL" },
{ "grp_nodes", "int default NULL" },
{ "grp_wall", "int default NULL" },
{ "grp_cpu_mins", "bigint default NULL" },
@@ -938,7 +939,7 @@
{ "nodes_alloc", "int unsigned not null" },
{ "node_inx", "text" },
{ "partition", "tinytext not null" },
- { "priority", "int not null" },
+ { "priority", "int unsigned not null" },
{ "state", "smallint unsigned not null" },
{ "timelimit", "int unsigned default 0 not null" },
{ "time_submit", "int unsigned default 0 not null" },
@@ -1351,6 +1352,8 @@
assoc->grp_cpus = INFINITE;
if (assoc->grp_jobs == NO_VAL)
assoc->grp_jobs = INFINITE;
+ if (assoc->grp_mem == NO_VAL)
+ assoc->grp_mem = INFINITE;
if (assoc->grp_nodes == NO_VAL)
assoc->grp_nodes = INFINITE;
if (assoc->grp_submit_jobs == NO_VAL)
@@ -1435,6 +1438,17 @@
xstrfmtcat(*extra, ", grp_jobs=%u", assoc->grp_jobs);
}
+ if (assoc->grp_mem == INFINITE) {
+ xstrcat(*cols, ", grp_mem");
+ xstrcat(*vals, ", NULL");
+ xstrcat(*extra, ", grp_mem=NULL");
+ } else if ((assoc->grp_mem != NO_VAL)
+ && ((int32_t)assoc->grp_mem >= 0)) {
+ xstrcat(*cols, ", grp_mem");
+ xstrfmtcat(*vals, ", %u", assoc->grp_mem);
+ xstrfmtcat(*extra, ", grp_mem=%u", assoc->grp_mem);
+ }
+
if (assoc->grp_nodes == INFINITE) {
xstrcat(*cols, ", grp_nodes");
xstrcat(*vals, ", NULL");
@@ -2500,7 +2514,7 @@
return ret_list;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_acct.c b/src/plugins/accounting_storage/mysql/as_mysql_acct.c
index f4aae5c..21e82a4 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_acct.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_acct.c
@@ -675,6 +675,7 @@
if (acct_cond->assoc_cond->acct_list)
list_destroy(acct_cond->assoc_cond->acct_list);
acct_cond->assoc_cond->acct_list = list_create(NULL);
+ acct_cond->assoc_cond->with_deleted = acct_cond->with_deleted;
}
while ((row = mysql_fetch_row(result))) {
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
index 64b531a..22156db 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
@@ -53,6 +53,7 @@
"grp_cpu_run_mins",
"grp_cpus",
"grp_jobs",
+ "grp_mem",
"grp_nodes",
"grp_submit_jobs",
"grp_wall",
@@ -81,6 +82,7 @@
ASSOC_REQ_GCRM,
ASSOC_REQ_GC,
ASSOC_REQ_GJ,
+ ASSOC_REQ_GMEM,
ASSOC_REQ_GN,
ASSOC_REQ_GSJ,
ASSOC_REQ_GW,
@@ -1140,6 +1142,22 @@
xstrcat(*extra, ")");
}
+ if (assoc_cond->grp_mem_list
+ && list_count(assoc_cond->grp_mem_list)) {
+ set = 0;
+ xstrcat(*extra, " && (");
+ itr = list_iterator_create(assoc_cond->grp_mem_list);
+ while ((object = list_next(itr))) {
+ if (set)
+ xstrcat(*extra, " || ");
+ xstrfmtcat(*extra, "%s.grp_mem='%s'",
+ prefix, object);
+ set = 1;
+ }
+ list_iterator_destroy(itr);
+ xstrcat(*extra, ")");
+ }
+
if (assoc_cond->grp_nodes_list
&& list_count(assoc_cond->grp_nodes_list)) {
set = 0;
@@ -1590,6 +1608,7 @@
mod_assoc->grp_cpu_mins = assoc->grp_cpu_mins;
mod_assoc->grp_cpu_run_mins = assoc->grp_cpu_run_mins;
mod_assoc->grp_jobs = assoc->grp_jobs;
+ mod_assoc->grp_mem = assoc->grp_mem;
mod_assoc->grp_nodes = assoc->grp_nodes;
mod_assoc->grp_submit_jobs = assoc->grp_submit_jobs;
mod_assoc->grp_wall = assoc->grp_wall;
@@ -2056,6 +2075,11 @@
else
assoc->grp_cpus = INFINITE;
+ if (row[ASSOC_REQ_GMEM])
+ assoc->grp_mem = slurm_atoul(row[ASSOC_REQ_GMEM]);
+ else
+ assoc->grp_mem = INFINITE;
+
if (row[ASSOC_REQ_GN])
assoc->grp_nodes = slurm_atoul(row[ASSOC_REQ_GN]);
else
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c
index 2e03d6b..0aaae4d 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_job.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c
@@ -513,12 +513,13 @@
xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, "
"cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, "
- "id_assoc=%u, id_wckey=%u, id_resv=%u, timelimit=%u "
+ "id_assoc=%u, id_wckey=%u, id_resv=%u, "
+ "timelimit=%u, time_eligible=%ld "
"where job_db_inx=%d",
start_time, jname, job_state,
job_ptr->total_cpus, node_cnt, job_ptr->qos_id,
job_ptr->assoc_id, wckeyid,
- job_ptr->resv_id, job_ptr->time_limit,
+ job_ptr->resv_id, job_ptr->time_limit, begin_time,
job_ptr->db_index);
debug3("%d(%s:%d) query\n%s",
mysql_conn->conn, THIS_FILE, __LINE__, query);
@@ -916,7 +917,6 @@
struct step_record *step_ptr)
{
time_t now;
- int elapsed;
int comp_status;
int cpus = 0;
struct jobacctinfo *jobacct = (struct jobacctinfo *)step_ptr->jobacct;
@@ -926,7 +926,7 @@
char *query = NULL;
int rc =SLURM_SUCCESS;
uint32_t exit_code = 0;
- time_t start_time, submit_time;
+ time_t submit_time;
if (!step_ptr->job_ptr->db_index
&& ((!step_ptr->job_ptr->details
@@ -937,14 +937,10 @@
return SLURM_ERROR;
}
- if (step_ptr->job_ptr->resize_time) {
- submit_time = start_time = step_ptr->job_ptr->resize_time;
- if (step_ptr->start_time > submit_time)
- start_time = step_ptr->start_time;
- } else {
- start_time = step_ptr->start_time;
+ if (step_ptr->job_ptr->resize_time)
+ submit_time = step_ptr->job_ptr->resize_time;
+ else
submit_time = step_ptr->job_ptr->details->submit_time;
- }
if (jobacct == NULL) {
/* JobAcctGather=slurmdb_gather/none, no data to process */
@@ -974,9 +970,6 @@
#endif
}
- if ((elapsed = (now - start_time)) < 0)
- elapsed = 0; /* For *very* short jobs, if clock is wrong */
-
exit_code = step_ptr->exit_code;
if (WIFSIGNALED(exit_code)) {
comp_status = JOB_CANCELLED;
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
index f279b81..fad2bd9 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
@@ -242,8 +242,7 @@
}
break;
case JOB_SUSPENDED:
- /* FIX ME: this should do something with the suspended
- table, but it doesn't right now. */
+ /* Handle this the same way we handle RUNNING. */
case JOB_RUNNING:
if (start) {
if (!end) {
@@ -252,7 +251,7 @@
"((!t1.time_end && t1.state=%d) || "
"(%d between t1.time_start "
"and t1.time_end)))",
- JOB_RUNNING, start);
+ base_state, start);
} else {
xstrfmtcat(*extra,
"(t1.time_start && "
@@ -845,7 +844,8 @@
/* get the dimensions of this cluster so we know how to deal
with the hostlists */
- query = xstrdup_printf("select dimensions from %s where name='%s'",
+ query = xstrdup_printf("select dimensions, flags from %s where "
+ "name='%s'",
cluster_table,
(char *)list_peek(job_cond->cluster_list));
@@ -862,7 +862,14 @@
(char *)list_peek(job_cond->cluster_list));
return NULL;
}
- dims = atoi(row[0]);
+
+ /* On a Cray System when dealing with hostlists as we are here
+ this always needs to be 1.
+ */
+ if (slurm_atoul(row[1]) & CLUSTER_FLAG_CRAYXT)
+ dims = 1;
+ else
+ dims = atoi(row[0]);
temp_hl = hostlist_create_dims(job_cond->used_nodes, dims);
if (hostlist_count(temp_hl) <= 0) {
@@ -918,6 +925,10 @@
if (local_cluster->end == 0) {
local_cluster->end = now;
(*curr_cluster) = local_cluster;
+ } else if (!(*curr_cluster)
+ || (((local_cluster_t *)(*curr_cluster))->end
+ < local_cluster->end)) {
+ (*curr_cluster) = local_cluster;
}
} else
_destroy_local_cluster(local_cluster);
@@ -978,16 +989,16 @@
return 1;
}
-extern char *setup_job_cluster_cond_limits(mysql_conn_t *mysql_conn,
- slurmdb_job_cond_t *job_cond,
- char *cluster_name, char **extra)
+extern int setup_job_cluster_cond_limits(mysql_conn_t *mysql_conn,
+ slurmdb_job_cond_t *job_cond,
+ char *cluster_name, char **extra)
{
int set = 0;
ListIterator itr = NULL;
char *object = NULL;
if (!job_cond)
- return NULL;
+ return SLURM_SUCCESS;
/* this must be done before resvid_list since we set
resvid_list up here */
@@ -1041,6 +1052,78 @@
xstrcat(*extra, ")");
}
+ if (job_cond->state_list && list_count(job_cond->state_list)) {
+ itr = list_iterator_create(job_cond->state_list);
+ while ((object = list_next(itr))) {
+ uint32_t state = (uint32_t)slurm_atoul(object);
+ state &= JOB_STATE_BASE;
+ if (state == JOB_SUSPENDED)
+ break;
+ }
+ list_iterator_destroy(itr);
+
+ if (object) {
+ MYSQL_RES *result = NULL;
+ MYSQL_ROW row;
+ char *query = xstrdup_printf(
+ "select job_db_inx from \"%s_%s\"",
+ cluster_name, suspend_table);
+ if (job_cond->usage_start) {
+ if (!job_cond->usage_end) {
+ xstrfmtcat(query,
+ " where (!time_end "
+ "|| (%d between "
+ "time_start and time_end))",
+ (int)job_cond->usage_start);
+ } else {
+ xstrfmtcat(query,
+ " where (!time_end "
+ "|| (time_start && "
+ "((%d between time_start "
+ "and time_end) "
+ "|| (time_start between "
+ "%d and %d))))",
+ (int)job_cond->usage_start,
+ (int)job_cond->usage_start,
+ (int)job_cond->usage_end);
+ }
+ } else if (job_cond->usage_end) {
+ xstrfmtcat(query, " where (time_start && "
+ "time_start < %d)",
+ (int)job_cond->usage_end);
+ }
+
+ debug3("%d(%s:%d) query\n%s",
+ mysql_conn->conn, THIS_FILE, __LINE__, query);
+ result = mysql_db_query_ret(mysql_conn, query, 0);
+ xfree(query);
+ if (!result)
+ return SLURM_ERROR;
+ set = 0;
+ while ((row = mysql_fetch_row(result))) {
+ if (set)
+ xstrfmtcat(*extra,
+ " || t1.job_db_inx=%s",
+ row[0]);
+ else {
+ set = 1;
+ if (*extra)
+ xstrfmtcat(
+ *extra,
+ " || (t1.job_db_inx=%s",
+ row[0]);
+ else
+ xstrfmtcat(*extra, " where "
+ "(t1.job_db_inx=%s",
+ row[0]);
+ }
+ }
+ mysql_free_result(result);
+ if (set)
+ xstrcat(*extra, ")");
+ }
+ }
+
return SLURM_SUCCESS;
}
@@ -1125,6 +1208,24 @@
xstrcat(*extra, ")");
}
+ if (job_cond->jobname_list && list_count(job_cond->jobname_list)) {
+ set = 0;
+ if (*extra)
+ xstrcat(*extra, " && (");
+ else
+ xstrcat(*extra, " where (");
+
+ itr = list_iterator_create(job_cond->jobname_list);
+ while ((object = list_next(itr))) {
+ if (set)
+ xstrcat(*extra, " || ");
+ xstrfmtcat(*extra, "t1.job_name='%s'", object);
+ set = 1;
+ }
+ list_iterator_destroy(itr);
+ xstrcat(*extra, ")");
+ }
+
if (job_cond->partition_list && list_count(job_cond->partition_list)) {
set = 0;
if (*extra)
@@ -1184,11 +1285,11 @@
xstrcat(*extra, " where (");
if (job_cond->cpus_max) {
- xstrfmtcat(*extra, "(t1.alloc_cpus between %u and %u))",
+ xstrfmtcat(*extra, "(t1.cpus_alloc between %u and %u))",
job_cond->cpus_min, job_cond->cpus_max);
} else {
- xstrfmtcat(*extra, "(t1.alloc_cpus='%u'))",
+ xstrfmtcat(*extra, "(t1.cpus_alloc='%u'))",
job_cond->cpus_min);
}
@@ -1202,11 +1303,11 @@
if (job_cond->nodes_max) {
xstrfmtcat(*extra,
- "(t1.alloc_nodes between %u and %u))",
+ "(t1.nodes_alloc between %u and %u))",
job_cond->nodes_min, job_cond->nodes_max);
} else {
- xstrfmtcat(*extra, "(t1.alloc_nodes='%u'))",
+ xstrfmtcat(*extra, "(t1.nodes_alloc='%u'))",
job_cond->nodes_min);
}
@@ -1241,6 +1342,7 @@
while ((object = list_next(itr))) {
if (set)
xstrcat(*extra, " || ");
+
_state_time_string(extra, (uint32_t)slurm_atoul(object),
job_cond->usage_start,
job_cond->usage_end);
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h
index 47cdd3b..719d023 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h
+++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.h
@@ -52,9 +52,9 @@
extern int good_nodes_from_inx(List local_cluster_list,
void **object, char *node_inx,
int submit);
-extern char *setup_job_cluster_cond_limits(mysql_conn_t *mysql_conn,
- slurmdb_job_cond_t *job_cond,
- char *cluster_name, char **extra);
+extern int setup_job_cluster_cond_limits(mysql_conn_t *mysql_conn,
+ slurmdb_job_cond_t *job_cond,
+ char *cluster_name, char **extra);
extern int setup_job_cond_limits(mysql_conn_t *mysql_conn,
slurmdb_job_cond_t *job_cond,
char **extra);
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_qos.c b/src/plugins/accounting_storage/mysql/as_mysql_qos.c
index 3102e90..73e2a48 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_qos.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_qos.c
@@ -102,6 +102,8 @@
qos->grp_cpus = INFINITE;
if (qos->grp_jobs == NO_VAL)
qos->grp_jobs = INFINITE;
+ if (qos->grp_mem == NO_VAL)
+ qos->grp_mem = INFINITE;
if (qos->grp_nodes == NO_VAL)
qos->grp_nodes = INFINITE;
if (qos->grp_submit_jobs == NO_VAL)
@@ -223,6 +225,17 @@
xstrfmtcat(*extra, ", grp_jobs=%u", qos->grp_jobs);
}
+ if (qos->grp_mem == INFINITE) {
+ xstrcat(*cols, ", grp_mem");
+ xstrcat(*vals, ", NULL");
+ xstrcat(*extra, ", grp_mem=NULL");
+ } else if ((qos->grp_mem != NO_VAL)
+ && ((int32_t)qos->grp_mem >= 0)) {
+ xstrcat(*cols, ", grp_mem");
+ xstrfmtcat(*vals, ", %u", qos->grp_mem);
+ xstrfmtcat(*extra, ", grp_mem=%u", qos->grp_mem);
+ }
+
if (qos->grp_nodes == INFINITE) {
xstrcat(*cols, ", grp_nodes");
xstrcat(*vals, ", NULL");
@@ -686,6 +699,7 @@
qos_rec->grp_cpu_mins = qos->grp_cpu_mins;
qos_rec->grp_cpu_run_mins = qos->grp_cpu_run_mins;
qos_rec->grp_jobs = qos->grp_jobs;
+ qos_rec->grp_mem = qos->grp_mem;
qos_rec->grp_nodes = qos->grp_nodes;
qos_rec->grp_submit_jobs = qos->grp_submit_jobs;
qos_rec->grp_wall = qos->grp_wall;
@@ -971,6 +985,7 @@
"grp_cpu_run_mins",
"grp_cpus",
"grp_jobs",
+ "grp_mem",
"grp_nodes",
"grp_submit_jobs",
"grp_wall",
@@ -999,6 +1014,7 @@
QOS_REQ_GCRM,
QOS_REQ_GC,
QOS_REQ_GJ,
+ QOS_REQ_GMEM,
QOS_REQ_GN,
QOS_REQ_GSJ,
QOS_REQ_GW,
@@ -1134,6 +1150,10 @@
qos->grp_jobs = slurm_atoul(row[QOS_REQ_GJ]);
else
qos->grp_jobs = INFINITE;
+ if (row[QOS_REQ_GMEM])
+ qos->grp_mem = slurm_atoul(row[QOS_REQ_GMEM]);
+ else
+ qos->grp_mem = INFINITE;
if (row[QOS_REQ_GN])
qos->grp_nodes = slurm_atoul(row[QOS_REQ_GN]);
else
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
index d334628..3437cda 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
@@ -716,7 +716,7 @@
seconds = (row_end - row_start);
- if (row[JOB_REQ_SUSPENDED]) {
+ if (slurm_atoul(row[JOB_REQ_SUSPENDED])) {
MYSQL_RES *result2 = NULL;
MYSQL_ROW row2;
/* get the suspended time for this job */
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_user.c b/src/plugins/accounting_storage/mysql/as_mysql_user.c
index a4fb90c..7c53e0f 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_user.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_user.c
@@ -1244,6 +1244,8 @@
if (!user_cond->assoc_cond->user_list)
user_cond->assoc_cond->user_list = list_create(NULL);
+ user_cond->assoc_cond->with_deleted = user_cond->with_deleted;
+
assoc_list = as_mysql_get_assocs(
mysql_conn, uid, user_cond->assoc_cond);
diff --git a/src/plugins/accounting_storage/none/Makefile.in b/src/plugins/accounting_storage/none/Makefile.in
index 624aff6..c3da188 100644
--- a/src/plugins/accounting_storage/none/Makefile.in
+++ b/src/plugins/accounting_storage/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
accounting_storage_none_la_LIBADD =
@@ -210,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -246,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-accounting_storage_none.la: $(accounting_storage_none_la_OBJECTS) $(accounting_storage_none_la_DEPENDENCIES)
+accounting_storage_none.la: $(accounting_storage_none_la_OBJECTS) $(accounting_storage_none_la_DEPENDENCIES) $(EXTRA_accounting_storage_none_la_DEPENDENCIES)
$(accounting_storage_none_la_LINK) -rpath $(pkglibdir) $(accounting_storage_none_la_OBJECTS) $(accounting_storage_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c
index 877bd4f..53d175f 100644
--- a/src/plugins/accounting_storage/none/accounting_storage_none.c
+++ b/src/plugins/accounting_storage/none/accounting_storage_none.c
@@ -275,7 +275,7 @@
return NULL;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff --git a/src/plugins/accounting_storage/pgsql/Makefile.am b/src/plugins/accounting_storage/pgsql/Makefile.am
index 8597055..d8b11af 100644
--- a/src/plugins/accounting_storage/pgsql/Makefile.am
+++ b/src/plugins/accounting_storage/pgsql/Makefile.am
@@ -13,7 +13,7 @@
as_pg_assoc.c as_pg_assoc.h \
as_pg_cluster.c as_pg_cluster.h \
as_pg_event.c as_pg_event.h \
- as_pg_get_jobs.c as_pg_get_jobs.h \
+ as_pg_get_jobs.c \
as_pg_job.c as_pg_job.h \
as_pg_problem.c as_pg_problem.h \
as_pg_qos.c as_pg_qos.h \
diff --git a/src/plugins/accounting_storage/pgsql/Makefile.in b/src/plugins/accounting_storage/pgsql/Makefile.in
index 66c7925..14b16f8 100644
--- a/src/plugins/accounting_storage/pgsql/Makefile.in
+++ b/src/plugins/accounting_storage/pgsql/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -116,11 +122,11 @@
as_pg_common.c as_pg_common.h as_pg_acct.c as_pg_acct.h \
as_pg_archive.c as_pg_archive.h as_pg_assoc.c as_pg_assoc.h \
as_pg_cluster.c as_pg_cluster.h as_pg_event.c as_pg_event.h \
- as_pg_get_jobs.c as_pg_get_jobs.h as_pg_job.c as_pg_job.h \
- as_pg_problem.c as_pg_problem.h as_pg_qos.c as_pg_qos.h \
- as_pg_resv.c as_pg_resv.h as_pg_rollup.c as_pg_rollup.h \
- as_pg_txn.c as_pg_txn.h as_pg_usage.c as_pg_usage.h \
- as_pg_user.c as_pg_user.h as_pg_wckey.c as_pg_wckey.h
+ as_pg_get_jobs.c as_pg_job.c as_pg_job.h as_pg_problem.c \
+ as_pg_problem.h as_pg_qos.c as_pg_qos.h as_pg_resv.c \
+ as_pg_resv.h as_pg_rollup.c as_pg_rollup.h as_pg_txn.c \
+ as_pg_txn.h as_pg_usage.c as_pg_usage.h as_pg_user.c \
+ as_pg_user.h as_pg_wckey.c as_pg_wckey.h
am__objects_1 = \
accounting_storage_pgsql_la-accounting_storage_pgsql.lo \
accounting_storage_pgsql_la-as_pg_common.lo \
@@ -146,11 +152,11 @@
as_pg_common.c as_pg_common.h as_pg_acct.c as_pg_acct.h \
as_pg_archive.c as_pg_archive.h as_pg_assoc.c as_pg_assoc.h \
as_pg_cluster.c as_pg_cluster.h as_pg_event.c as_pg_event.h \
- as_pg_get_jobs.c as_pg_get_jobs.h as_pg_job.c as_pg_job.h \
- as_pg_problem.c as_pg_problem.h as_pg_qos.c as_pg_qos.h \
- as_pg_resv.c as_pg_resv.h as_pg_rollup.c as_pg_rollup.h \
- as_pg_txn.c as_pg_txn.h as_pg_usage.c as_pg_usage.h \
- as_pg_user.c as_pg_user.h as_pg_wckey.c as_pg_wckey.h
+ as_pg_get_jobs.c as_pg_job.c as_pg_job.h as_pg_problem.c \
+ as_pg_problem.h as_pg_qos.c as_pg_qos.h as_pg_resv.c \
+ as_pg_resv.h as_pg_rollup.c as_pg_rollup.h as_pg_txn.c \
+ as_pg_txn.h as_pg_usage.c as_pg_usage.h as_pg_user.c \
+ as_pg_user.h as_pg_wckey.c as_pg_wckey.h
accounting_storage_pgsql_la_OBJECTS = \
$(am_accounting_storage_pgsql_la_OBJECTS)
accounting_storage_pgsql_la_LINK = $(LIBTOOL) --tag=CC \
@@ -256,6 +262,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -292,6 +299,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -389,7 +397,7 @@
as_pg_assoc.c as_pg_assoc.h \
as_pg_cluster.c as_pg_cluster.h \
as_pg_event.c as_pg_event.h \
- as_pg_get_jobs.c as_pg_get_jobs.h \
+ as_pg_get_jobs.c \
as_pg_job.c as_pg_job.h \
as_pg_problem.c as_pg_problem.h \
as_pg_qos.c as_pg_qos.h \
@@ -476,7 +484,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-accounting_storage_pgsql.la: $(accounting_storage_pgsql_la_OBJECTS) $(accounting_storage_pgsql_la_DEPENDENCIES)
+accounting_storage_pgsql.la: $(accounting_storage_pgsql_la_OBJECTS) $(accounting_storage_pgsql_la_DEPENDENCIES) $(EXTRA_accounting_storage_pgsql_la_DEPENDENCIES)
$(accounting_storage_pgsql_la_LINK) $(am_accounting_storage_pgsql_la_rpath) $(accounting_storage_pgsql_la_OBJECTS) $(accounting_storage_pgsql_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -748,10 +756,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
index 9c52aa7..975a08b 100644
--- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
+++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
@@ -498,7 +498,7 @@
return as_pg_get_clusters(pg_conn, uid, cluster_cond);
}
-extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn)
+extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn, char *config_name)
{
return NULL;
}
diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h
index 66cb201..308ac89 100644
--- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h
+++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h
@@ -122,7 +122,7 @@
slurmdb_account_cond_t *acct_cond);
extern List acct_storage_p_get_clusters(pgsql_conn_t *pg_conn, uid_t uid,
slurmdb_cluster_cond_t *cluster_cond);
-extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn);
+extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn, char *config_name);
extern List acct_storage_p_get_associations(pgsql_conn_t *pg_conn, uid_t uid,
slurmdb_association_cond_t *assoc_cond);
extern List acct_storage_p_get_problems(pgsql_conn_t *pg_conn, uid_t uid,
diff --git a/src/plugins/accounting_storage/pgsql/as_pg_assoc.c b/src/plugins/accounting_storage/pgsql/as_pg_assoc.c
index 3f8ec16..523aaad 100644
--- a/src/plugins/accounting_storage/pgsql/as_pg_assoc.c
+++ b/src/plugins/accounting_storage/pgsql/as_pg_assoc.c
@@ -67,6 +67,7 @@
{ "grp_jobs", "INTEGER DEFAULT NULL" },
{ "grp_submit_jobs", "INTEGER DEFAULT NULL" },
{ "grp_cpus", "INTEGER DEFAULT NULL" },
+ { "grp_mem", "INTEGER DEFAULT NULL" },
{ "grp_nodes", "INTEGER DEFAULT NULL" },
{ "grp_wall", "INTEGER DEFAULT NULL" },
{ "grp_cpu_mins", "BIGINT DEFAULT NULL" },
@@ -250,7 +251,7 @@
" lft, rgt, shares, max_jobs, max_submit_jobs, "
" max_cpus_pj, max_nodes_pj, "
" max_wall_pj, max_cpu_mins_pj, "
- " grp_jobs, grp_submit_jobs, grp_cpus, grp_nodes, "
+ " grp_jobs, grp_submit_jobs, grp_cpus, grp_mem, grp_nodes, "
" grp_wall, grp_cpu_mins, qos, delta_qos) "
" VALUES (na.creation_time, na.mod_time, na.deleted, "
" DEFAULT, na.acct, na.user_name,"
@@ -259,7 +260,7 @@
" na.max_cpus_pj, na.max_nodes_pj, "
" na.max_wall_pj, "
" na.max_cpu_mins_pj, na.grp_jobs, "
- " na.grp_submit_jobs, na.grp_cpus, na.grp_nodes, "
+ " na.grp_submit_jobs, na.grp_cpus, na.grp_mem, na.grp_nodes, "
" na.grp_wall, na.grp_cpu_mins, na.qos, na.delta_qos) "
" RETURNING id_assoc INTO na_id;"
" RETURN na_id; "
@@ -275,7 +276,7 @@
" max_cpu_mins_pj=na.max_cpu_mins_pj, "
" grp_jobs=na.grp_jobs, "
" grp_submit_jobs=na.grp_submit_jobs, "
- " grp_cpus=na.grp_cpus, grp_nodes=na.grp_nodes, "
+ " grp_cpus=na.grp_cpus, grp_mem=na.grp_mem, grp_nodes=na.grp_nodes, "
" grp_wall=na.grp_wall, grp_cpu_mins=na.grp_cpu_mins, "
" qos=na.qos, delta_qos=na.delta_qos "
" WHERE acct=na.acct AND "
@@ -313,7 +314,7 @@
" max_cpu_mins_pj=assoc.max_cpu_mins_pj, "
" grp_jobs=assoc.grp_jobs, "
" grp_submit_jobs=assoc.grp_submit_jobs, "
- " grp_cpus=assoc.grp_cpus, grp_nodes=assoc.grp_nodes, "
+ " grp_cpus=assoc.grp_cpus, grp_mem=assoc.grp_mem, grp_nodes=assoc.grp_nodes, "
" grp_wall=assoc.grp_wall, grp_cpu_mins=assoc.grp_cpu_mins, "
" qos=assoc.qos, delta_qos=assoc.delta_qos "
" WHERE acct=assoc.acct AND "
@@ -544,6 +545,7 @@
concat_limit_32("grp_jobs", assoc->grp_jobs, rec, txn);
concat_limit_32("grp_submit_jobs", assoc->grp_submit_jobs, rec, txn);
concat_limit_32("grp_cpus", assoc->grp_cpus, rec, txn);
+ concat_limit_32("grp_mem", assoc->grp_mem, rec, txn);
concat_limit_32("grp_nodes", assoc->grp_nodes, rec, txn);
concat_limit_32("grp_wall", assoc->grp_wall, rec, txn);
concat_limit_64("grp_cpu_mins", assoc->grp_cpu_mins, rec, txn);
@@ -625,6 +627,7 @@
"NULL, " /* grp_jobs */
"NULL, " /* grp_submit_jobs */
"NULL, " /* grp_cpus */
+ "NULL, " /* grp_mem */
"NULL, " /* grp_nodes */
"NULL, " /* grp_wall */
"NULL, " /* grp_cpu_mins */
@@ -658,6 +661,7 @@
concat_limit_32("grp_submit_jobs",
ra->grp_submit_jobs, rec, txn);
concat_limit_32("grp_cpus", ra->grp_cpus, rec, txn);
+ concat_limit_32("grp_mem", ra->grp_mem, rec, txn);
concat_limit_32("grp_nodes", ra->grp_nodes, rec, txn);
concat_limit_32("grp_wall", ra->grp_wall, rec, txn);
concat_limit_64("grp_cpu_mins", ra->grp_cpu_mins, rec, txn);
@@ -963,6 +967,8 @@
prefix, "grp_cpus", &cond);
concat_cond_list(assoc_cond->grp_jobs_list,
prefix, "grp_jobs", &cond);
+ concat_cond_list(assoc_cond->grp_mem_list,
+ prefix, "grp_mem", &cond);
concat_cond_list(assoc_cond->grp_nodes_list,
prefix, "grp_nodes", &cond);
concat_cond_list(assoc_cond->grp_submit_jobs_list,
@@ -1032,6 +1038,7 @@
&tmp, vals);
concat_limit_32("grp_cpus", assoc->grp_cpus, &tmp, vals);
concat_limit_32("grp_jobs", assoc->grp_jobs, &tmp, vals);
+ concat_limit_32("grp_mem", assoc->grp_mem, &tmp, vals);
concat_limit_32("grp_nodes", assoc->grp_nodes, &tmp, vals);
concat_limit_32("grp_submit_jobs", assoc->grp_submit_jobs, &tmp, vals);
concat_limit_32("grp_wall", assoc->grp_wall, &tmp, vals);
@@ -1063,6 +1070,7 @@
dest->grp_cpu_mins = src->grp_cpu_mins;
dest->grp_cpu_run_mins = src->grp_cpu_run_mins;
dest->grp_jobs = src->grp_jobs;
+ dest->grp_mem = src->grp_mem;
dest->grp_nodes = src->grp_nodes;
dest->grp_submit_jobs = src->grp_submit_jobs;
dest->grp_wall = src->grp_wall;
@@ -1489,7 +1497,7 @@
/* if this changes you will need to edit the corresponding enum */
char *ga_fields = "t1.id_assoc, t1.lft, t1.rgt, t1.user_name, t1.acct,"
"t1.partition, t1.shares, t1.grp_cpu_mins, t1.grp_cpu_run_mins,"
- "t1.grp_cpus, t1.grp_jobs, t1.grp_nodes, t1.grp_submit_jobs,"
+ "t1.grp_cpus, t1.grp_jobs, t1.grp_mem, t1.grp_nodes, t1.grp_submit_jobs,"
"t1.grp_wall, t1.max_cpu_mins_pj, t1.max_cpu_run_mins, "
"t1.max_cpus_pj, t1.max_jobs, t1.max_nodes_pj, "
"t1.max_submit_jobs, t1.max_wall_pj, t1.parent_acct, "
@@ -1506,6 +1514,7 @@
F_GCRM,
F_GC,
F_GJ,
+ F_GMEM,
F_GN,
F_GSJ,
F_GW,
@@ -1567,6 +1576,7 @@
assoc->grp_jobs = ISNULL(F_GJ) ? INFINITE : atoi(ROW(F_GJ));
assoc->grp_cpus = ISNULL(F_GC) ? INFINITE : atoi(ROW(F_GC));
+ assoc->grp_mem = ISNULL(F_GMEM) ? INFINITE : atoi(ROW(F_GMEM));
assoc->grp_nodes = ISNULL(F_GN) ? INFINITE : atoi(ROW(F_GN));
assoc->grp_wall = ISNULL(F_GW) ? INFINITE : atoll(ROW(F_GW));
assoc->grp_submit_jobs = ISNULL(F_GSJ) ? INFINITE : atoi(ROW(F_GSJ));
diff --git a/src/plugins/accounting_storage/pgsql/as_pg_qos.c b/src/plugins/accounting_storage/pgsql/as_pg_qos.c
index 4cad080..a78d527 100644
--- a/src/plugins/accounting_storage/pgsql/as_pg_qos.c
+++ b/src/plugins/accounting_storage/pgsql/as_pg_qos.c
@@ -60,6 +60,7 @@
{ "grp_jobs", "INTEGER DEFAULT NULL" },
{ "grp_submit_jobs", "INTEGER DEFAULT NULL" },
{ "grp_cpus", "INTEGER DEFAULT NULL" },
+ { "grp_mem", "INTEGER DEFAULT NULL" },
{ "grp_nodes", "INTEGER DEFAULT NULL" },
{ "grp_wall", "INTEGER DEFAULT NULL" },
{ "grp_cpu_mins", "BIGINT DEFAULT NULL" },
@@ -96,7 +97,7 @@
" max_submit_jobs_per_user, max_cpus_per_job, "
" max_nodes_per_job, max_wall_duration_per_job, "
" max_cpu_mins_per_job, max_cpu_run_mins_per_user, "
- " grp_jobs, grp_submit_jobs, grp_cpus, grp_nodes, "
+ " grp_jobs, grp_submit_jobs, grp_cpus, grp_mem, grp_nodes, "
" grp_wall, grp_cpu_mins, grp_cpu_run_mins, preempt, "
" preempt_mode, priority, usage_factor) "
" VALUES (rec.creation_time, rec.mod_time, "
@@ -107,7 +108,7 @@
" rec.max_wall_duration_per_job, "
" rec.max_cpu_mins_per_job, "
" rec.max_cpu_run_mins_per_user, "
- " rec.grp_jobs, rec.grp_submit_jobs, rec.grp_cpus, "
+ " rec.grp_jobs, rec.grp_submit_jobs, rec.grp_cpus, rec.grp_mem, "
" rec.grp_nodes, rec.grp_wall, rec.grp_cpu_mins, "
" rec.grp_cpu_run_mins, rec.preempt, rec.preempt_mode, "
" rec.priority, rec.usage_factor) "
@@ -119,7 +120,7 @@
" max_submit_jobs_per_user, max_cpus_per_job, "
" max_nodes_per_job, max_wall_duration_per_job, "
" max_cpu_mins_per_job, max_cpu_run_mins_per_user, "
- " grp_jobs, grp_submit_jobs, grp_cpus, grp_nodes, "
+ " grp_jobs, grp_submit_jobs, grp_cpus, grp_mem, grp_nodes, "
" grp_wall, grp_cpu_mins, grp_cpu_run_mins, "
" preempt, preempt_mode, priority, usage_factor) = "
" (0, rec.mod_time, rec.description, "
@@ -129,7 +130,7 @@
" rec.max_wall_duration_per_job, "
" rec.max_cpu_mins_per_job, "
" rec.max_cpu_run_mins_per_user, "
- " rec.grp_jobs, rec.grp_submit_jobs, rec.grp_cpus, "
+ " rec.grp_jobs, rec.grp_submit_jobs, rec.grp_cpus, rec.grp_mem, "
" rec.grp_nodes, rec.grp_wall, rec.grp_cpu_mins, "
" rec.grp_cpu_run_mins, rec.preempt, rec.preempt_mode, "
" rec.priority, rec.usage_factor) "
@@ -176,6 +177,7 @@
concat_limit_32("grp_jobs", object->grp_jobs, rec, txn);
concat_limit_32("grp_submit_jobs", object->grp_submit_jobs, rec, txn);
concat_limit_32("grp_cpus", object->grp_cpus, rec, txn);
+ concat_limit_32("grp_mem", object->grp_mem, rec, txn);
concat_limit_32("grp_nodes", object->grp_nodes, rec, txn);
concat_limit_32("grp_wall", object->grp_wall, rec, txn);
concat_limit_64("grp_cpu_mins", object->grp_cpu_mins, rec, txn);
@@ -287,6 +289,7 @@
concat_limit_32("grp_jobs", qos->grp_jobs, NULL, vals);
concat_limit_32("grp_submit_jobs", qos->grp_submit_jobs, NULL, vals);
concat_limit_32("grp_cpus", qos->grp_cpus, NULL, vals);
+ concat_limit_32("grp_mem", qos->grp_mem, NULL, vals);
concat_limit_32("grp_nodes", qos->grp_nodes, NULL, vals);
concat_limit_32("grp_wall", qos->grp_wall, NULL, vals);
concat_limit_64("grp_cpu_mins", qos->grp_cpu_mins, NULL, vals);
@@ -626,6 +629,7 @@
qos_rec->grp_cpu_mins = qos->grp_cpu_mins;
qos_rec->grp_cpu_run_mins = qos->grp_cpu_run_mins;
qos_rec->grp_jobs = qos->grp_jobs;
+ qos_rec->grp_mem = qos->grp_mem;
qos_rec->grp_nodes = qos->grp_nodes;
qos_rec->grp_submit_jobs = qos->grp_submit_jobs;
qos_rec->grp_wall = qos->grp_wall;
@@ -868,7 +872,7 @@
/* if this changes you will need to edit the corresponding enum */
char *gq_fields = "name,description,id_qos,grp_cpu_mins,"
- "grp_cpu_run_mins,grp_cpus,grp_jobs,grp_nodes,grp_submit_jobs,"
+ "grp_cpu_run_mins,grp_cpus,grp_jobs,grp_mem,grp_nodes,grp_submit_jobs,"
"grp_wall,max_cpu_mins_per_job,max_cpu_run_mins_per_user,"
"max_cpus_per_job,max_jobs_per_user,max_nodes_per_job,"
"max_submit_jobs_per_user,max_wall_duration_per_job,preempt,"
@@ -881,6 +885,7 @@
F_GCRM,
F_GC,
F_GJ,
+ F_GMEM,
F_GN,
F_GSJ,
F_GW,
@@ -952,6 +957,10 @@
qos->grp_jobs = atoi(ROW(F_GJ));
else
qos->grp_jobs = INFINITE;
+ if(! ISNULL(F_GMEM))
+ qos->grp_mem = atoi(ROW(F_GMEM));
+ else
+ qos->grp_mem = INFINITE;
if(! ISNULL(F_GN))
qos->grp_nodes = atoi(ROW(F_GN));
else
diff --git a/src/plugins/accounting_storage/slurmdbd/Makefile.in b/src/plugins/accounting_storage/slurmdbd/Makefile.in
index f16b6cd..c5b30a4 100644
--- a/src/plugins/accounting_storage/slurmdbd/Makefile.in
+++ b/src/plugins/accounting_storage/slurmdbd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
accounting_storage_slurmdbd_la_LIBADD =
@@ -211,6 +217,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -247,6 +254,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,7 +415,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-accounting_storage_slurmdbd.la: $(accounting_storage_slurmdbd_la_OBJECTS) $(accounting_storage_slurmdbd_la_DEPENDENCIES)
+accounting_storage_slurmdbd.la: $(accounting_storage_slurmdbd_la_OBJECTS) $(accounting_storage_slurmdbd_la_DEPENDENCIES) $(EXTRA_accounting_storage_slurmdbd_la_DEPENDENCIES)
$(accounting_storage_slurmdbd_la_LINK) -rpath $(pkglibdir) $(accounting_storage_slurmdbd_la_OBJECTS) $(accounting_storage_slurmdbd_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -544,10 +552,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
index 6b26b27..048d013 100644
--- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
+++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
@@ -277,6 +277,11 @@
local_job_list = list_create(
_partial_destroy_dbd_job_start);
list_append(local_job_list, req);
+ /* Just so we don't have a crazy
+ amount of messages at once.
+ */
+ if (list_count(local_job_list) > 1000)
+ break;
}
}
list_iterator_destroy(itr);
@@ -286,6 +291,7 @@
slurmdbd_msg_t req, resp;
dbd_list_msg_t send_msg, *got_msg;
int rc = SLURM_SUCCESS;
+ bool reset = 0;
memset(&send_msg, 0, sizeof(dbd_list_msg_t));
@@ -296,20 +302,23 @@
rc = slurm_send_recv_slurmdbd_msg(
SLURMDBD_VERSION, &req, &resp);
list_destroy(local_job_list);
- if (rc != SLURM_SUCCESS)
+ if (rc != SLURM_SUCCESS) {
error("slurmdbd: DBD_SEND_MULT_JOB_START "
"failure: %m");
- else if (resp.msg_type == DBD_RC) {
+ reset = 1;
+ } else if (resp.msg_type == DBD_RC) {
dbd_rc_msg_t *msg = resp.data;
if (msg->return_code == SLURM_SUCCESS) {
info("%s", msg->comment);
} else
error("%s", msg->comment);
slurmdbd_free_rc_msg(msg);
+ reset = 1;
} else if (resp.msg_type != DBD_GOT_MULT_JOB_START) {
error("slurmdbd: response type not "
"DBD_GOT_MULT_JOB_START: %u",
resp.msg_type);
+ reset = 1;
} else {
dbd_id_rc_msg_t *id_ptr = NULL;
got_msg = (dbd_list_msg_t *) resp.data;
@@ -326,6 +335,19 @@
slurmdbd_free_list_msg(got_msg);
}
+
+ if (reset) {
+ lock_slurmctld(job_read_lock);
+ /* USE READ LOCK, SEE ABOVE on first
+ * read lock */
+ itr = list_iterator_create(job_list);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->db_index == NO_VAL)
+ job_ptr->db_index = 0;
+ }
+ list_iterator_destroy(itr);
+ unlock_slurmctld(job_read_lock);
+ }
}
running_db_inx = 0;
@@ -1288,7 +1310,7 @@
rc = slurm_send_slurmdbd_recv_rc_msg(SLURMDBD_VERSION,
&req, &resp_code);
- if (resp_code != SLURM_SUCCESS)
+ if ((rc == SLURM_SUCCESS) && (resp_code != SLURM_SUCCESS))
rc = resp_code;
return rc;
@@ -1419,7 +1441,7 @@
return ret_list;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
slurmdbd_msg_t req, resp;
dbd_list_msg_t *got_msg;
@@ -1427,7 +1449,7 @@
List ret_list = NULL;
req.msg_type = DBD_GET_CONFIG;
- req.data = NULL;
+ req.data = config_name;
rc = slurm_send_recv_slurmdbd_msg(SLURMDBD_VERSION, &req, &resp);
if (rc != SLURM_SUCCESS)
diff --git a/src/plugins/auth/Makefile.in b/src/plugins/auth/Makefile.in
index f169f11..5d89934 100644
--- a/src/plugins/auth/Makefile.in
+++ b/src/plugins/auth/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/auth/authd/Makefile.in b/src/plugins/auth/authd/Makefile.in
index e469d94..deff9ce 100644
--- a/src/plugins/auth/authd/Makefile.in
+++ b/src/plugins/auth/authd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
auth_authd_la_DEPENDENCIES =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -409,7 +417,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-auth_authd.la: $(auth_authd_la_OBJECTS) $(auth_authd_la_DEPENDENCIES)
+auth_authd.la: $(auth_authd_la_OBJECTS) $(auth_authd_la_DEPENDENCIES) $(EXTRA_auth_authd_la_DEPENDENCIES)
$(auth_authd_la_LINK) $(am_auth_authd_la_rpath) $(auth_authd_la_OBJECTS) $(auth_authd_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -546,10 +554,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/auth/munge/Makefile.in b/src/plugins/auth/munge/Makefile.in
index dad5541..1814140 100644
--- a/src/plugins/auth/munge/Makefile.in
+++ b/src/plugins/auth/munge/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -210,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -246,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -410,7 +418,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-auth_munge.la: $(auth_munge_la_OBJECTS) $(auth_munge_la_DEPENDENCIES)
+auth_munge.la: $(auth_munge_la_OBJECTS) $(auth_munge_la_DEPENDENCIES) $(EXTRA_auth_munge_la_DEPENDENCIES)
$(auth_munge_la_LINK) $(am_auth_munge_la_rpath) $(auth_munge_la_OBJECTS) $(auth_munge_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c
index 7100642..767a708 100644
--- a/src/plugins/auth/munge/auth_munge.c
+++ b/src/plugins/auth/munge/auth_munge.c
@@ -101,7 +101,7 @@
* messages that this plugin can accept
*/
const char plugin_name[] = "auth plugin for Munge "
- "(http://home.gna.org/munge/)";
+ "(http://code.google.com/p/munge/)";
const char plugin_type[] = "auth/munge";
const uint32_t plugin_version = 10;
const uint32_t min_plug_version = 10; /* minimum version accepted */
diff --git a/src/plugins/auth/none/Makefile.in b/src/plugins/auth/none/Makefile.in
index 202b864..6ca5e01 100644
--- a/src/plugins/auth/none/Makefile.in
+++ b/src/plugins/auth/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
auth_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-auth_none.la: $(auth_none_la_OBJECTS) $(auth_none_la_DEPENDENCIES)
+auth_none.la: $(auth_none_la_OBJECTS) $(auth_none_la_DEPENDENCIES) $(EXTRA_auth_none_la_DEPENDENCIES)
$(auth_none_la_LINK) -rpath $(pkglibdir) $(auth_none_la_OBJECTS) $(auth_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/checkpoint/Makefile.in b/src/plugins/checkpoint/Makefile.in
index 82aaf5a..584c485 100644
--- a/src/plugins/checkpoint/Makefile.in
+++ b/src/plugins/checkpoint/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/checkpoint/aix/Makefile.in b/src/plugins/checkpoint/aix/Makefile.in
index 9a8679a..548d67e 100644
--- a/src/plugins/checkpoint/aix/Makefile.in
+++ b/src/plugins/checkpoint/aix/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
checkpoint_aix_la_LIBADD =
@@ -213,6 +219,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -249,6 +256,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -410,7 +418,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-checkpoint_aix.la: $(checkpoint_aix_la_OBJECTS) $(checkpoint_aix_la_DEPENDENCIES)
+checkpoint_aix.la: $(checkpoint_aix_la_OBJECTS) $(checkpoint_aix_la_DEPENDENCIES) $(EXTRA_checkpoint_aix_la_DEPENDENCIES)
$(checkpoint_aix_la_LINK) $(am_checkpoint_aix_la_rpath) $(checkpoint_aix_la_OBJECTS) $(checkpoint_aix_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/checkpoint/blcr/Makefile.am b/src/plugins/checkpoint/blcr/Makefile.am
index 528d21d..f00b96b 100644
--- a/src/plugins/checkpoint/blcr/Makefile.am
+++ b/src/plugins/checkpoint/blcr/Makefile.am
@@ -21,4 +21,5 @@
@cd `dirname $@` && $(MAKE) `basename $@`
else
EXTRA_checkpoint_blcr_la_SOURCES = checkpoint_blcr.c
+EXTRA_DIST = cr_checkpoint.sh cr_restart.sh
endif
diff --git a/src/plugins/checkpoint/blcr/Makefile.in b/src/plugins/checkpoint/blcr/Makefile.in
index c517e75..8bf41c8 100644
--- a/src/plugins/checkpoint/blcr/Makefile.in
+++ b/src/plugins/checkpoint/blcr/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -108,6 +108,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)" \
"$(DESTDIR)$(pkglibexecdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
@@ -219,6 +225,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -255,6 +262,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -351,6 +359,7 @@
@WITH_BLCR_TRUE@checkpoint_blcr_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
@WITH_BLCR_TRUE@dist_pkglibexec_SCRIPTS = cr_checkpoint.sh cr_restart.sh
@WITH_BLCR_FALSE@EXTRA_checkpoint_blcr_la_SOURCES = checkpoint_blcr.c
+@WITH_BLCR_FALSE@EXTRA_DIST = cr_checkpoint.sh cr_restart.sh
all: all-am
.SUFFIXES:
@@ -420,7 +429,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-checkpoint_blcr.la: $(checkpoint_blcr_la_OBJECTS) $(checkpoint_blcr_la_DEPENDENCIES)
+checkpoint_blcr.la: $(checkpoint_blcr_la_OBJECTS) $(checkpoint_blcr_la_DEPENDENCIES) $(EXTRA_checkpoint_blcr_la_DEPENDENCIES)
$(checkpoint_blcr_la_LINK) $(am_checkpoint_blcr_la_rpath) $(checkpoint_blcr_la_OBJECTS) $(checkpoint_blcr_la_LIBADD) $(LIBS)
install-dist_pkglibexecSCRIPTS: $(dist_pkglibexec_SCRIPTS)
@$(NORMAL_INSTALL)
@@ -453,9 +462,7 @@
@list='$(dist_pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || exit 0; \
files=`for p in $$list; do echo "$$p"; done | \
sed -e 's,.*/,,;$(transform)'`; \
- test -n "$$list" || exit 0; \
- echo " ( cd '$(DESTDIR)$(pkglibexecdir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(pkglibexecdir)" && rm -f $$files
+ dir='$(DESTDIR)$(pkglibexecdir)'; $(am__uninstall_files_from_dir)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -591,10 +598,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/checkpoint/none/Makefile.in b/src/plugins/checkpoint/none/Makefile.in
index e38baf8..7185ff1 100644
--- a/src/plugins/checkpoint/none/Makefile.in
+++ b/src/plugins/checkpoint/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
checkpoint_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-checkpoint_none.la: $(checkpoint_none_la_OBJECTS) $(checkpoint_none_la_DEPENDENCIES)
+checkpoint_none.la: $(checkpoint_none_la_OBJECTS) $(checkpoint_none_la_DEPENDENCIES) $(EXTRA_checkpoint_none_la_DEPENDENCIES)
$(checkpoint_none_la_LINK) -rpath $(pkglibdir) $(checkpoint_none_la_OBJECTS) $(checkpoint_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/checkpoint/ompi/Makefile.in b/src/plugins/checkpoint/ompi/Makefile.in
index 60dfd8a..a88ed8b 100644
--- a/src/plugins/checkpoint/ompi/Makefile.in
+++ b/src/plugins/checkpoint/ompi/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
checkpoint_ompi_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-checkpoint_ompi.la: $(checkpoint_ompi_la_OBJECTS) $(checkpoint_ompi_la_DEPENDENCIES)
+checkpoint_ompi.la: $(checkpoint_ompi_la_OBJECTS) $(checkpoint_ompi_la_DEPENDENCIES) $(EXTRA_checkpoint_ompi_la_DEPENDENCIES)
$(checkpoint_ompi_la_LINK) -rpath $(pkglibdir) $(checkpoint_ompi_la_OBJECTS) $(checkpoint_ompi_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/crypto/Makefile.in b/src/plugins/crypto/Makefile.in
index 7514c4b..a7aa6f7 100644
--- a/src/plugins/crypto/Makefile.in
+++ b/src/plugins/crypto/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/crypto/munge/Makefile.in b/src/plugins/crypto/munge/Makefile.in
index 9a900d4..366f317 100644
--- a/src/plugins/crypto/munge/Makefile.in
+++ b/src/plugins/crypto/munge/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -210,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -246,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -410,7 +418,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-crypto_munge.la: $(crypto_munge_la_OBJECTS) $(crypto_munge_la_DEPENDENCIES)
+crypto_munge.la: $(crypto_munge_la_OBJECTS) $(crypto_munge_la_DEPENDENCIES) $(EXTRA_crypto_munge_la_DEPENDENCIES)
$(crypto_munge_la_LINK) $(am_crypto_munge_la_rpath) $(crypto_munge_la_OBJECTS) $(crypto_munge_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/crypto/munge/crypto_munge.c b/src/plugins/crypto/munge/crypto_munge.c
index 12bf2b5..ae911d8 100644
--- a/src/plugins/crypto/munge/crypto_munge.c
+++ b/src/plugins/crypto/munge/crypto_munge.c
@@ -241,6 +241,7 @@
"slurmd mode.");
}
#else
+ rc = err;
goto end_it;
#endif
}
diff --git a/src/plugins/crypto/openssl/Makefile.in b/src/plugins/crypto/openssl/Makefile.in
index 225207f..4785ef0 100644
--- a/src/plugins/crypto/openssl/Makefile.in
+++ b/src/plugins/crypto/openssl/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -215,6 +221,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -251,6 +258,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -413,7 +421,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-crypto_openssl.la: $(crypto_openssl_la_OBJECTS) $(crypto_openssl_la_DEPENDENCIES)
+crypto_openssl.la: $(crypto_openssl_la_OBJECTS) $(crypto_openssl_la_DEPENDENCIES) $(EXTRA_crypto_openssl_la_DEPENDENCIES)
$(crypto_openssl_la_LINK) $(am_crypto_openssl_la_rpath) $(crypto_openssl_la_OBJECTS) $(crypto_openssl_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -550,10 +558,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/gres/Makefile.in b/src/plugins/gres/Makefile.in
index a3c9fd3..5c08756 100644
--- a/src/plugins/gres/Makefile.in
+++ b/src/plugins/gres/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/gres/gpu/Makefile.in b/src/plugins/gres/gpu/Makefile.in
index be5c09f..a2968ce 100644
--- a/src/plugins/gres/gpu/Makefile.in
+++ b/src/plugins/gres/gpu/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-gres_gpu.la: $(gres_gpu_la_OBJECTS) $(gres_gpu_la_DEPENDENCIES)
+gres_gpu.la: $(gres_gpu_la_OBJECTS) $(gres_gpu_la_DEPENDENCIES) $(EXTRA_gres_gpu_la_DEPENDENCIES)
$(gres_gpu_la_LINK) -rpath $(pkglibdir) $(gres_gpu_la_OBJECTS) $(gres_gpu_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -543,10 +551,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c
index 3e60f18..dfe6a51 100644
--- a/src/plugins/gres/gpu/gres_gpu.c
+++ b/src/plugins/gres/gpu/gres_gpu.c
@@ -105,10 +105,13 @@
*/
const char plugin_name[] = "Gres GPU plugin";
const char plugin_type[] = "gres/gpu";
-const uint32_t plugin_version = 100;
+const uint32_t plugin_version = 110;
static char gres_name[] = "gpu";
+static int *gpu_devices = NULL;
+static int nb_available_files;
+
/*
* We could load gres state or validate it using various mechanisms here.
* This only validates that the configuration was specified in gres.conf.
@@ -116,23 +119,62 @@
*/
extern int node_config_load(List gres_conf_list)
{
- int rc = SLURM_ERROR;
+ int i, rc = SLURM_ERROR;
ListIterator iter;
gres_slurmd_conf_t *gres_slurmd_conf;
+ int nb_gpu = 0; /* Number of GPUs in the list */
+ int available_files_index = 0;
xassert(gres_conf_list);
iter = list_iterator_create(gres_conf_list);
if (iter == NULL)
fatal("list_iterator_create: malloc failure");
+
+ iter = list_iterator_create(gres_conf_list);
while ((gres_slurmd_conf = list_next(iter))) {
- if (strcmp(gres_slurmd_conf->name, gres_name) == 0) {
- rc = SLURM_SUCCESS;
+ if (strcmp(gres_slurmd_conf->name, gres_name))
+ continue;
+ rc = SLURM_SUCCESS;
+ if (gres_slurmd_conf->file)
+ nb_gpu++;
+ }
+ list_iterator_destroy(iter);
+ gpu_devices = NULL;
+ nb_available_files = -1;
+
+ /* (Re-)Allocate memory if number of files changed */
+ if (nb_gpu != nb_available_files) {
+ xfree(gpu_devices); /* No-op if NULL */
+ gpu_devices = (int *) xmalloc(sizeof(int) * nb_gpu);
+ nb_available_files = nb_gpu;
+ for (i = 0; i < nb_available_files; i++)
+ gpu_devices[i] = -1;
+ }
+
+ iter = list_iterator_create(gres_conf_list);
+ while ((gres_slurmd_conf = list_next(iter))) {
+ if ((strcmp(gres_slurmd_conf->name, gres_name) == 0) &&
+ gres_slurmd_conf->file) {
+ /* Populate gpu_devices array with number
+ * at end of the file name */
+ for (i = 0; gres_slurmd_conf->file[i]; i++) {
+ if (!isdigit(gres_slurmd_conf->file[i]))
+ continue;
+ gpu_devices[available_files_index] =
+ atoi(gres_slurmd_conf->file + i);
+ break;
+ }
+ available_files_index++;
}
}
list_iterator_destroy(iter);
if (rc != SLURM_SUCCESS)
fatal("%s failed to load configuration", plugin_name);
+
+ for (i = 0; i < nb_available_files; i++)
+ info("gpu %d is device number %d", i, gpu_devices[i]);
+
return rc;
}
@@ -158,7 +200,10 @@
dev_list = xmalloc(128);
else
xstrcat(dev_list, ",");
- xstrfmtcat(dev_list, "%d", i);
+ if (gpu_devices && (gpu_devices[i] >= 0))
+ xstrfmtcat(dev_list, "%d", gpu_devices[i]);
+ else
+ xstrfmtcat(dev_list, "%d", i);
}
}
if (dev_list) {
@@ -195,7 +240,10 @@
dev_list = xmalloc(128);
else
xstrcat(dev_list, ",");
- xstrfmtcat(dev_list, "%d", i);
+ if (gpu_devices && (gpu_devices[i] >= 0))
+ xstrfmtcat(dev_list, "%d", gpu_devices[i]);
+ else
+ xstrfmtcat(dev_list, "%d", i);
}
}
if (dev_list) {
@@ -209,3 +257,31 @@
"no device files configured");
}
}
+
+/* Send GRES information to slurmstepd on the specified file descriptor*/
+extern void send_stepd(int fd)
+{
+ int i;
+
+ safe_write(fd, &nb_available_files, sizeof(int));
+ for (i = 0; i < nb_available_files; i++)
+ safe_write(fd, &gpu_devices[i], sizeof(int));
+ return;
+
+rwfail: error("gres_plugin_send_stepd failed");
+}
+
+/* Receive GRES information from slurmd on the specified file descriptor*/
+extern void recv_stepd(int fd)
+{
+ int i;
+
+ safe_read(fd, &nb_available_files, sizeof(int));
+ if (nb_available_files > 0)
+ gpu_devices = xmalloc(sizeof(int) * nb_available_files);
+ for (i = 0; i < nb_available_files; i++)
+ safe_read(fd, &gpu_devices[i], sizeof(int));
+ return;
+
+rwfail: error("gres_plugin_recv_stepd failed");
+}
diff --git a/src/plugins/gres/nic/Makefile.in b/src/plugins/gres/nic/Makefile.in
index 4b9f185..a554517 100644
--- a/src/plugins/gres/nic/Makefile.in
+++ b/src/plugins/gres/nic/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-gres_nic.la: $(gres_nic_la_OBJECTS) $(gres_nic_la_DEPENDENCIES)
+gres_nic.la: $(gres_nic_la_OBJECTS) $(gres_nic_la_DEPENDENCIES) $(EXTRA_gres_nic_la_DEPENDENCIES)
$(gres_nic_la_LINK) -rpath $(pkglibdir) $(gres_nic_la_OBJECTS) $(gres_nic_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -543,10 +551,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c
index b50af5d..ed93dea 100644
--- a/src/plugins/gres/nic/gres_nic.c
+++ b/src/plugins/gres/nic/gres_nic.c
@@ -103,7 +103,7 @@
*/
const char plugin_name[] = "Gres NIC plugin";
const char plugin_type[] = "gres/nic";
-const uint32_t plugin_version = 100;
+const uint32_t plugin_version = 110;
static char gres_name[] = "nic";
@@ -151,3 +151,15 @@
{
/* EMPTY */
}
+
+/* Send GRES information to slurmstepd on the specified file descriptor*/
+extern void send_stepd(int fd)
+{
+ /* EMPTY */
+}
+
+/* Receive GRES information from slurmd on the specified file descriptor*/
+extern void recv_stepd(int fd)
+{
+ /* EMPTY */
+}
diff --git a/src/plugins/job_submit/Makefile.am b/src/plugins/job_submit/Makefile.am
index d3e3e4f..f94ca08 100644
--- a/src/plugins/job_submit/Makefile.am
+++ b/src/plugins/job_submit/Makefile.am
@@ -1,7 +1,3 @@
# Makefile for job_submit plugins
-if HAVE_LUA
-LUA = lua
-endif
-
-SUBDIRS = cnode defaults logging $(LUA) partition
+SUBDIRS = cnode defaults logging lua partition
diff --git a/src/plugins/job_submit/Makefile.in b/src/plugins/job_submit/Makefile.in
index 3a53b1c..8d0ff54 100644
--- a/src/plugins/job_submit/Makefile.in
+++ b/src/plugins/job_submit/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -99,7 +99,7 @@
distdir
ETAGS = etags
CTAGS = ctags
-DIST_SUBDIRS = cnode defaults logging lua partition
+DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -326,8 +328,7 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-@HAVE_LUA_TRUE@LUA = lua
-SUBDIRS = cnode defaults logging $(LUA) partition
+SUBDIRS = cnode defaults logging lua partition
all: all-recursive
.SUFFIXES:
@@ -576,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/job_submit/cnode/Makefile.in b/src/plugins/job_submit/cnode/Makefile.in
index 19170c7..e2062d2 100644
--- a/src/plugins/job_submit/cnode/Makefile.in
+++ b/src/plugins/job_submit/cnode/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
job_submit_cnode_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-job_submit_cnode.la: $(job_submit_cnode_la_OBJECTS) $(job_submit_cnode_la_DEPENDENCIES)
+job_submit_cnode.la: $(job_submit_cnode_la_OBJECTS) $(job_submit_cnode_la_DEPENDENCIES) $(EXTRA_job_submit_cnode_la_DEPENDENCIES)
$(job_submit_cnode_la_LINK) -rpath $(pkglibdir) $(job_submit_cnode_la_OBJECTS) $(job_submit_cnode_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/job_submit/defaults/Makefile.in b/src/plugins/job_submit/defaults/Makefile.in
index d3482d2..af85cab 100644
--- a/src/plugins/job_submit/defaults/Makefile.in
+++ b/src/plugins/job_submit/defaults/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
job_submit_defaults_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-job_submit_defaults.la: $(job_submit_defaults_la_OBJECTS) $(job_submit_defaults_la_DEPENDENCIES)
+job_submit_defaults.la: $(job_submit_defaults_la_OBJECTS) $(job_submit_defaults_la_DEPENDENCIES) $(EXTRA_job_submit_defaults_la_DEPENDENCIES)
$(job_submit_defaults_la_LINK) -rpath $(pkglibdir) $(job_submit_defaults_la_OBJECTS) $(job_submit_defaults_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/job_submit/logging/Makefile.in b/src/plugins/job_submit/logging/Makefile.in
index d1669f8..38a1484 100644
--- a/src/plugins/job_submit/logging/Makefile.in
+++ b/src/plugins/job_submit/logging/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
job_submit_logging_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-job_submit_logging.la: $(job_submit_logging_la_OBJECTS) $(job_submit_logging_la_DEPENDENCIES)
+job_submit_logging.la: $(job_submit_logging_la_OBJECTS) $(job_submit_logging_la_DEPENDENCIES) $(EXTRA_job_submit_logging_la_DEPENDENCIES)
$(job_submit_logging_la_LINK) -rpath $(pkglibdir) $(job_submit_logging_la_OBJECTS) $(job_submit_logging_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/job_submit/lua/Makefile.am b/src/plugins/job_submit/lua/Makefile.am
index 6c3cf55..dd7f354 100644
--- a/src/plugins/job_submit/lua/Makefile.am
+++ b/src/plugins/job_submit/lua/Makefile.am
@@ -7,8 +7,13 @@
PLUGIN_FLAGS = -module -avoid-version --export-dynamic
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(lua_CFLAGS)
-pkglib_LTLIBRARIES = job_submit_lua.la
-
+if HAVE_LUA
+
# Job submit lua plugin.
+pkglib_LTLIBRARIES = job_submit_lua.la
job_submit_lua_la_SOURCES = job_submit_lua.c
job_submit_lua_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(lua_LIBS)
+
+else
+EXTRA_job_submit_lua_la_SOURCES = job_submit_lua.c
+endif
diff --git a/src/plugins/job_submit/lua/Makefile.in b/src/plugins/job_submit/lua/Makefile.in
index 83c4e14..83778cd 100644
--- a/src/plugins/job_submit/lua/Makefile.in
+++ b/src/plugins/job_submit/lua/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,14 +105,23 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
job_submit_lua_la_LIBADD =
-am_job_submit_lua_la_OBJECTS = job_submit_lua.lo
+am__job_submit_lua_la_SOURCES_DIST = job_submit_lua.c
+@HAVE_LUA_TRUE@am_job_submit_lua_la_OBJECTS = job_submit_lua.lo
+am__EXTRA_job_submit_lua_la_SOURCES_DIST = job_submit_lua.c
job_submit_lua_la_OBJECTS = $(am_job_submit_lua_la_OBJECTS)
job_submit_lua_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(job_submit_lua_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_LUA_TRUE@am_job_submit_lua_la_rpath = -rpath $(pkglibdir)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
am__depfiles_maybe = depfiles
@@ -126,8 +135,10 @@
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
-SOURCES = $(job_submit_lua_la_SOURCES)
-DIST_SOURCES = $(job_submit_lua_la_SOURCES)
+SOURCES = $(job_submit_lua_la_SOURCES) \
+ $(EXTRA_job_submit_lua_la_SOURCES)
+DIST_SOURCES = $(am__job_submit_lua_la_SOURCES_DIST) \
+ $(am__EXTRA_job_submit_lua_la_SOURCES_DIST)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -208,6 +219,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +256,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -335,11 +348,12 @@
AM_CPPFLAGS = -DDEFAULT_SCRIPT_DIR=\"$(sysconfdir)\"
PLUGIN_FLAGS = -module -avoid-version --export-dynamic
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common $(lua_CFLAGS)
-pkglib_LTLIBRARIES = job_submit_lua.la
# Job submit lua plugin.
-job_submit_lua_la_SOURCES = job_submit_lua.c
-job_submit_lua_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(lua_LIBS)
+@HAVE_LUA_TRUE@pkglib_LTLIBRARIES = job_submit_lua.la
+@HAVE_LUA_TRUE@job_submit_lua_la_SOURCES = job_submit_lua.c
+@HAVE_LUA_TRUE@job_submit_lua_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(lua_LIBS)
+@HAVE_LUA_FALSE@EXTRA_job_submit_lua_la_SOURCES = job_submit_lua.c
all: all-am
.SUFFIXES:
@@ -405,8 +419,8 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-job_submit_lua.la: $(job_submit_lua_la_OBJECTS) $(job_submit_lua_la_DEPENDENCIES)
- $(job_submit_lua_la_LINK) -rpath $(pkglibdir) $(job_submit_lua_la_OBJECTS) $(job_submit_lua_la_LIBADD) $(LIBS)
+job_submit_lua.la: $(job_submit_lua_la_OBJECTS) $(job_submit_lua_la_DEPENDENCIES) $(EXTRA_job_submit_lua_la_DEPENDENCIES)
+ $(job_submit_lua_la_LINK) $(am_job_submit_lua_la_rpath) $(job_submit_lua_la_OBJECTS) $(job_submit_lua_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -542,10 +556,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c
index a89c76d..46fa6fd 100644
--- a/src/plugins/job_submit/lua/job_submit_lua.c
+++ b/src/plugins/job_submit/lua/job_submit_lua.c
@@ -777,8 +777,9 @@
_push_job_desc(job_desc);
_push_partition_list(job_desc->user_id, submit_uid);
+ lua_pushnumber (L, submit_uid);
_stack_dump("job_submit, before lua_pcall", L);
- if (lua_pcall (L, 2, 1, 0) != 0) {
+ if (lua_pcall (L, 3, 1, 0) != 0) {
error("%s/lua: %s: %s",
__func__, lua_script_path, lua_tostring (L, -1));
} else {
@@ -815,8 +816,9 @@
_push_job_desc(job_desc);
_push_job_rec(job_ptr);
_push_partition_list(job_ptr->user_id, submit_uid);
+ lua_pushnumber (L, submit_uid);
_stack_dump("job_modify, before lua_pcall", L);
- if (lua_pcall (L, 3, 1, 0) != 0) {
+ if (lua_pcall (L, 4, 1, 0) != 0) {
error("%s/lua: %s: %s",
__func__, lua_script_path, lua_tostring (L, -1));
} else {
diff --git a/src/plugins/job_submit/partition/Makefile.in b/src/plugins/job_submit/partition/Makefile.in
index 6d0bb65..fb25f40 100644
--- a/src/plugins/job_submit/partition/Makefile.in
+++ b/src/plugins/job_submit/partition/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
job_submit_partition_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -405,7 +413,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-job_submit_partition.la: $(job_submit_partition_la_OBJECTS) $(job_submit_partition_la_DEPENDENCIES)
+job_submit_partition.la: $(job_submit_partition_la_OBJECTS) $(job_submit_partition_la_DEPENDENCIES) $(EXTRA_job_submit_partition_la_DEPENDENCIES)
$(job_submit_partition_la_LINK) -rpath $(pkglibdir) $(job_submit_partition_la_OBJECTS) $(job_submit_partition_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobacct_gather/Makefile.am b/src/plugins/jobacct_gather/Makefile.am
index 1039e1a..21525a1 100644
--- a/src/plugins/jobacct_gather/Makefile.am
+++ b/src/plugins/jobacct_gather/Makefile.am
@@ -1,3 +1,3 @@
# Makefile for jobacct plugins
-SUBDIRS = linux aix none
+SUBDIRS = linux aix cgroup none
diff --git a/src/plugins/jobacct_gather/Makefile.in b/src/plugins/jobacct_gather/Makefile.in
index 0a64971..a9d8030 100644
--- a/src/plugins/jobacct_gather/Makefile.in
+++ b/src/plugins/jobacct_gather/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -326,7 +328,7 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-SUBDIRS = linux aix none
+SUBDIRS = linux aix cgroup none
all: all-recursive
.SUFFIXES:
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobacct_gather/aix/Makefile.in b/src/plugins/jobacct_gather/aix/Makefile.in
index 018de83..8d672f9 100644
--- a/src/plugins/jobacct_gather/aix/Makefile.in
+++ b/src/plugins/jobacct_gather/aix/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobacct_gather_aix_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobacct_gather_aix.la: $(jobacct_gather_aix_la_OBJECTS) $(jobacct_gather_aix_la_DEPENDENCIES)
+jobacct_gather_aix.la: $(jobacct_gather_aix_la_OBJECTS) $(jobacct_gather_aix_la_DEPENDENCIES) $(EXTRA_jobacct_gather_aix_la_DEPENDENCIES)
$(jobacct_gather_aix_la_LINK) -rpath $(pkglibdir) $(jobacct_gather_aix_la_OBJECTS) $(jobacct_gather_aix_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobacct_gather/cgroup/Makefile.am b/src/plugins/jobacct_gather/cgroup/Makefile.am
new file mode 100644
index 0000000..5f9a70c
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/Makefile.am
@@ -0,0 +1,18 @@
+# Makefile for jobacct_gather/cgroup plugin
+
+AUTOMAKE_OPTIONS = foreign
+
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+
+pkglib_LTLIBRARIES = jobacct_gather_cgroup.la
+
+# Null job completion logging plugin.
+jobacct_gather_cgroup_la_SOURCES = jobacct_gather_cgroup.c \
+ jobacct_gather_cgroup_cpuacct.c \
+ jobacct_gather_cgroup_memory.c \
+ jobacct_gather_cgroup.h
+
+jobacct_gather_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+
diff --git a/src/plugins/jobacct_gather/cgroup/Makefile.in b/src/plugins/jobacct_gather/cgroup/Makefile.in
new file mode 100644
index 0000000..a30b88c
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/Makefile.in
@@ -0,0 +1,671 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile for jobacct_gather/cgroup plugin
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/plugins/jobacct_gather/cgroup
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(pkglibdir)"
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+jobacct_gather_cgroup_la_LIBADD =
+am_jobacct_gather_cgroup_la_OBJECTS = jobacct_gather_cgroup.lo \
+ jobacct_gather_cgroup_cpuacct.lo \
+ jobacct_gather_cgroup_memory.lo
+jobacct_gather_cgroup_la_OBJECTS = \
+ $(am_jobacct_gather_cgroup_la_OBJECTS)
+jobacct_gather_cgroup_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(jobacct_gather_cgroup_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
+depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(jobacct_gather_cgroup_la_SOURCES)
+DIST_SOURCES = $(jobacct_gather_cgroup_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = foreign
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+pkglib_LTLIBRARIES = jobacct_gather_cgroup.la
+
+# Null job completion logging plugin.
+jobacct_gather_cgroup_la_SOURCES = jobacct_gather_cgroup.c \
+ jobacct_gather_cgroup_cpuacct.c \
+ jobacct_gather_cgroup_memory.c \
+ jobacct_gather_cgroup.h
+
+jobacct_gather_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/jobacct_gather/cgroup/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/plugins/jobacct_gather/cgroup/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)"
+ @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \
+ }
+
+uninstall-pkglibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \
+ done
+
+clean-pkglibLTLIBRARIES:
+ -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+jobacct_gather_cgroup.la: $(jobacct_gather_cgroup_la_OBJECTS) $(jobacct_gather_cgroup_la_DEPENDENCIES) $(EXTRA_jobacct_gather_cgroup_la_DEPENDENCIES)
+ $(jobacct_gather_cgroup_la_LINK) -rpath $(pkglibdir) $(jobacct_gather_cgroup_la_OBJECTS) $(jobacct_gather_cgroup_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobacct_gather_cgroup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobacct_gather_cgroup_cpuacct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobacct_gather_cgroup_memory.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(pkglibdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-pkglibLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-pkglibLTLIBRARIES \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c
new file mode 100644
index 0000000..9d7aed9
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c
@@ -0,0 +1,922 @@
+/*****************************************************************************\
+ * jobacct_gather_cgroup.c - slurm job accounting gather plugin for cgroup.
+ *****************************************************************************
+ * Copyright (C) 2011 Bull.
+ * Written by Martin Perry, <martin.perry@bull.com>, who borrowed heavily
+ * from other parts of SLURM
+ * CODE-OCEC-09-009. All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * This file is patterned after jobcomp_linux.c, written by Morris Jette and
+ * Copyright (C) 2002 The Regents of the University of California.
+\*****************************************************************************/
+
+#include <fcntl.h>
+#include <signal.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/slurm_protocol_api.h"
+#include "src/common/slurm_protocol_defs.h"
+#include "src/slurmd/slurmd/slurmd.h"
+#include "src/common/xstring.h"
+#include "src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h"
+#include "src/slurmd/common/proctrack.h"
+
+#define _DEBUG 0
+
+/* These are defined here so when we link with something other than
+ * the slurmd we will have these symbols defined. They will get
+ * overwritten when linking with the slurmd.
+ */
+#if defined (__APPLE__)
+uint32_t jobacct_job_id __attribute__((weak_import));
+pthread_mutex_t jobacct_lock __attribute__((weak_import));
+uint32_t jobacct_mem_limit __attribute__((weak_import));
+uint32_t jobacct_step_id __attribute__((weak_import));
+uint32_t jobacct_vmem_limit __attribute__((weak_import));
+slurmd_conf_t *conf __attribute__((weak_import));
+int bg_recover __attribute__((weak_import)) = NOT_FROM_CONTROLLER;
+#else
+uint32_t jobacct_job_id;
+pthread_mutex_t jobacct_lock;
+uint32_t jobacct_mem_limit;
+uint32_t jobacct_step_id;
+uint32_t jobacct_vmem_limit;
+slurmd_conf_t *conf;
+int bg_recover = NOT_FROM_CONTROLLER;
+#endif
+
+
+/*
+ * These variables are required by the generic plugin interface. If they
+ * are not found in the plugin, the plugin loader will ignore it.
+ *
+ * plugin_name - a string giving a human-readable description of the
+ * plugin. There is no maximum length, but the symbol must refer to
+ * a valid string.
+ *
+ * plugin_type - a string suggesting the type of the plugin or its
+ * applicability to a particular form of data or method of data handling.
+ * If the low-level plugin API is used, the contents of this string are
+ * unimportant and may be anything. SLURM uses the higher-level plugin
+ * interface which requires this string to be of the form
+ *
+ * <application>/<method>
+ *
+ * where <application> is a description of the intended application of
+ * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method>
+ * is a description of how this plugin satisfies that application. SLURM will
+ * only load job completion logging plugins if the plugin_type string has a
+ * prefix of "jobacct/".
+ *
+ * plugin_version - an unsigned 32-bit integer giving the version number
+ * of the plugin. If major and minor revisions are desired, the major
+ * version number may be multiplied by a suitable magnitude constant such
+ * as 100 or 1000. Various SLURM versions will likely require a certain
+ * minimum version for their plugins as the job accounting API
+ * matures.
+ */
+const char plugin_name[] = "Job accounting gather cgroup plugin";
+const char plugin_type[] = "jobacct_gather/cgroup";
+const uint32_t plugin_version = 100;
+
+/* Other useful declarations */
+
+typedef struct prec { /* process record */
+ pid_t pid;
+ pid_t ppid;
+ int usec; /* user cpu time */
+ int ssec; /* system cpu time */
+ int pages; /* pages */
+ int rss; /* rss */
+ int vsize; /* virtual size */
+ int cpu_cycles; /* cpu cycles */
+// int last_cpu;
+} prec_t;
+
+static int freq = 0;
+static DIR *slash_proc = NULL;
+static pthread_mutex_t reading_mutex = PTHREAD_MUTEX_INITIALIZER;
+static bool jobacct_shutdown = 0;
+static bool jobacct_suspended = 0;
+static List task_list = NULL;
+static uint64_t cont_id = (uint64_t)NO_VAL;
+static bool pgid_plugin = false;
+static slurm_cgroup_conf_t slurm_cgroup_conf;
+
+/* Finally, pre-define all local routines. */
+
+static void _acct_kill_step(void);
+static void _destroy_prec(void *object);
+static int _is_a_lwp(uint32_t pid);
+static void _get_process_data(void);
+static int _get_process_data_line(int in, prec_t *prec);
+static void *_watch_tasks(void *arg);
+
+/*
+ * _get_process_data() - Build a table of all current processes
+ *
+ * IN: pid.
+ *
+ * OUT: none
+ *
+ * THREADSAFE! Only one thread ever gets here.
+ *
+ * Assumption:
+ * Any file with a name of the form "/proc/[0-9]+/stat"
+ * is a Linux-style stat entry. We disregard the data if they look
+ * wrong.
+ */
+static void _get_process_data(void)
+{
+ static int slash_proc_open = 0;
+
+ struct dirent *slash_proc_entry;
+ char *iptr = NULL, *optr = NULL;
+ FILE *stat_fp = NULL;
+ char proc_stat_file[256]; /* Allow ~20x extra length */
+ List prec_list = NULL;
+ pid_t *pids = NULL;
+ int npids = 0;
+ uint32_t total_job_mem = 0, total_job_vsize = 0;
+ int i, fd;
+ ListIterator itr;
+ ListIterator itr2;
+ prec_t *prec = NULL;
+ struct jobacctinfo *jobacct = NULL;
+ static int processing = 0;
+ long hertz;
+ char *cpu_time, *memory_stat, *ptr;
+ size_t cpu_time_size, memory_stat_size;
+ int utime, stime, total_rss, total_pgpgin, page_size;
+
+ page_size = getpagesize();
+ if (!pgid_plugin && cont_id == (uint64_t)NO_VAL) {
+ debug("cont_id hasn't been set yet not running poll");
+ return;
+ }
+
+ if (processing) {
+ debug("already running, returning");
+ return;
+ }
+ processing = 1;
+ prec_list = list_create(_destroy_prec);
+
+ hertz = sysconf(_SC_CLK_TCK);
+ if (hertz < 1) {
+ error ("_get_process_data: unable to get clock rate");
+ hertz = 100; /* default on many systems */
+ }
+
+ if (!pgid_plugin) {
+ /* get only the processes in the proctrack container */
+ slurm_container_get_pids(cont_id, &pids, &npids);
+ if (!npids) {
+ debug4("no pids in this container %"PRIu64"", cont_id);
+ goto finished;
+ }
+ for (i = 0; i < npids; i++) {
+ snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]);
+ if ((stat_fp = fopen(proc_stat_file, "r"))==NULL)
+ continue; /* Assume the process went away */
+ /*
+ * Close the file on exec() of user tasks.
+ *
+ * NOTE: If we fork() slurmstepd after the
+ * fopen() above and before the fcntl() below,
+ * then the user task may have this extra file
+ * open, which can cause problems for
+ * checkpoint/restart, but this should be a very rare
+ * problem in practice.
+ */
+ fd = fileno(stat_fp);
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+ prec = xmalloc(sizeof(prec_t));
+ if (_get_process_data_line(fd, prec)) {
+ xcgroup_get_param(&task_cpuacct_cg,
+ "cpuacct.stat",
+ &cpu_time, &cpu_time_size);
+ sscanf(cpu_time, "%*s %d %*s %d", &utime,
+ &stime);
+ prec->usec = utime;
+ prec->ssec = stime;
+ prec->vsize = 0;
+ xcgroup_get_param(&task_memory_cg,
+ "memory.stat",
+ &memory_stat,
+ &memory_stat_size);
+ ptr = strstr(memory_stat, "total_rss");
+ sscanf(ptr, "total_rss %u", &total_rss);
+ ptr = strstr(memory_stat, "total_pgpgin");
+ sscanf(ptr, "total_pgpgin %u", &total_pgpgin);
+ prec->pages = total_pgpgin;
+ prec->rss = total_rss / page_size;
+ list_append(prec_list, prec);
+ } else
+ xfree(prec);
+ fclose(stat_fp);
+ }
+ } else {
+ slurm_mutex_lock(&reading_mutex);
+
+ if (slash_proc_open) {
+ rewinddir(slash_proc);
+ } else {
+ slash_proc=opendir("/proc");
+ if (slash_proc == NULL) {
+ perror("opening /proc");
+ slurm_mutex_unlock(&reading_mutex);
+ goto finished;
+ }
+ slash_proc_open=1;
+ }
+ strcpy(proc_stat_file, "/proc/");
+
+ while ((slash_proc_entry = readdir(slash_proc))) {
+
+ /* Save a few cyles by simulating
+ strcat(statFileName, slash_proc_entry->d_name);
+ strcat(statFileName, "/stat");
+ while checking for a numeric filename (which really
+ should be a pid).
+ */
+ optr = proc_stat_file + sizeof("/proc");
+ iptr = slash_proc_entry->d_name;
+ i = 0;
+ do {
+ if ((*iptr < '0')
+ || ((*optr++ = *iptr++) > '9')) {
+ i = -1;
+ break;
+ }
+ } while (*iptr);
+
+ if (i == -1)
+ continue;
+ iptr = (char*)"/stat";
+
+ do {
+ *optr++ = *iptr++;
+ } while (*iptr);
+ *optr = 0;
+
+ if ((stat_fp = fopen(proc_stat_file,"r"))==NULL)
+ continue; /* Assume the process went away */
+ /*
+ * Close the file on exec() of user tasks.
+ *
+ * NOTE: If we fork() slurmstepd after the
+ * fopen() above and before the fcntl() below,
+ * then the user task may have this extra file
+ * open, which can cause problems for
+ * checkpoint/restart, but this should be a very rare
+ * problem in practice.
+ */
+ fd = fileno(stat_fp);
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+ prec = xmalloc(sizeof(prec_t));
+ if (_get_process_data_line(fd, prec)) {
+ xcgroup_get_param(&task_cpuacct_cg,
+ "cpuacct.stat",
+ &cpu_time, &cpu_time_size);
+ sscanf(cpu_time, "%*s %d %*s %d", &utime,
+ &stime);
+ prec->usec = utime;
+ prec->ssec = stime;
+ prec->vsize = 0;
+ xcgroup_get_param(&task_memory_cg,"memory.stat",
+ &memory_stat,
+ &memory_stat_size);
+ ptr = strstr(memory_stat, "total_rss");
+ sscanf(ptr, "total_rss %u", &total_rss);
+ ptr = strstr(memory_stat, "total_pgpgin");
+ sscanf(ptr, "total_pgpgin %u", &total_pgpgin);
+ prec->pages = total_pgpgin;
+ prec->rss = total_rss / page_size;
+ list_append(prec_list, prec);
+ }
+ else
+ xfree(prec);
+ fclose(stat_fp);
+ }
+ slurm_mutex_unlock(&reading_mutex);
+
+ }
+
+ if (!list_count(prec_list)) {
+ goto finished; /* We have no business being here! */
+ }
+
+ slurm_mutex_lock(&jobacct_lock);
+ if (!task_list || !list_count(task_list)) {
+ slurm_mutex_unlock(&jobacct_lock);
+ goto finished;
+ }
+
+ itr = list_iterator_create(task_list);
+ while ((jobacct = list_next(itr))) {
+ itr2 = list_iterator_create(prec_list);
+ while ((prec = list_next(itr2))) {
+ if (prec->pid == jobacct->pid) {
+#if _DEBUG
+ info("pid:%u ppid:%u rss:%d KB",
+ prec->pid, prec->ppid, prec->rss);
+#endif
+ /* tally their usage */
+ jobacct->max_rss = jobacct->tot_rss =
+ MAX(jobacct->max_rss, prec->rss);
+ total_job_mem += prec->rss;
+ jobacct->max_vsize = jobacct->tot_vsize =
+ MAX(jobacct->max_vsize, prec->vsize);
+ total_job_vsize += prec->vsize;
+ jobacct->max_pages = jobacct->tot_pages =
+ MAX(jobacct->max_pages, prec->pages);
+ jobacct->min_cpu = jobacct->tot_cpu =
+ MAX(jobacct->min_cpu,
+ (prec->ssec / hertz +
+ prec->usec / hertz));
+ debug2("%d mem size %u %u time %u(%u+%u) ",
+ jobacct->pid, jobacct->max_rss,
+ jobacct->max_vsize, jobacct->tot_cpu,
+ prec->usec, prec->ssec);
+ break;
+ }
+ }
+ list_iterator_destroy(itr2);
+ }
+ list_iterator_destroy(itr);
+ slurm_mutex_unlock(&jobacct_lock);
+
+ if (jobacct_mem_limit) {
+ if (jobacct_step_id == NO_VAL) {
+ debug("Job %u memory used:%u limit:%u KB",
+ jobacct_job_id, total_job_mem, jobacct_mem_limit);
+ } else {
+ debug("Step %u.%u memory used:%u limit:%u KB",
+ jobacct_job_id, jobacct_step_id,
+ total_job_mem, jobacct_mem_limit);
+ }
+ }
+ if (jobacct_job_id && jobacct_mem_limit &&
+ (total_job_mem > jobacct_mem_limit)) {
+ if (jobacct_step_id == NO_VAL) {
+ error("Job %u exceeded %u KB memory limit, being "
+ "killed", jobacct_job_id, jobacct_mem_limit);
+ } else {
+ error("Step %u.%u exceeded %u KB memory limit, being "
+ "killed", jobacct_job_id, jobacct_step_id,
+ jobacct_mem_limit);
+ }
+ _acct_kill_step();
+ } else if (jobacct_job_id && jobacct_vmem_limit &&
+ (total_job_vsize > jobacct_vmem_limit)) {
+ if (jobacct_step_id == NO_VAL) {
+ error("Job %u exceeded %u KB virtual memory limit, "
+ "being killed", jobacct_job_id,
+ jobacct_vmem_limit);
+ } else {
+ error("Step %u.%u exceeded %u KB virtual memory "
+ "limit, being killed", jobacct_job_id,
+ jobacct_step_id, jobacct_vmem_limit);
+ }
+ _acct_kill_step();
+ }
+
+finished:
+ list_destroy(prec_list);
+ processing = 0;
+ return;
+}
+
+/* _acct_kill_step() issue RPC to kill a slurm job step */
+static void _acct_kill_step(void)
+{
+ slurm_msg_t msg;
+ job_step_kill_msg_t req;
+ job_notify_msg_t notify_req;
+
+ slurm_msg_t_init(&msg);
+ notify_req.job_id = jobacct_job_id;
+ notify_req.job_step_id = jobacct_step_id;
+ notify_req.message = "Exceeded job memory limit";
+ msg.msg_type = REQUEST_JOB_NOTIFY;
+ msg.data = ¬ify_req;
+ slurm_send_only_controller_msg(&msg);
+
+ /*
+ * Request message:
+ */
+ req.job_id = jobacct_job_id;
+ req.job_step_id = jobacct_step_id;
+ req.signal = SIGKILL;
+ req.batch_flag = 0;
+ msg.msg_type = REQUEST_CANCEL_JOB_STEP;
+ msg.data = &req;
+
+ slurm_send_only_controller_msg(&msg);
+}
+
+static int _is_a_lwp(uint32_t pid) {
+
+ FILE *status_fp = NULL;
+ char proc_status_file[256];
+ uint32_t tgid;
+ int rc;
+
+ if (snprintf(proc_status_file, 256, "/proc/%d/status", pid) > 256) {
+ debug("jobacct_gather_cgroup: unable to build proc_status "
+ "fpath");
+ return -1;
+ }
+ if (!(status_fp = fopen(proc_status_file, "r"))) {
+ debug3("jobacct_gather_cgroup: unable to open %s",
+ proc_status_file);
+ return -1;
+ }
+
+
+ do {
+ rc = fscanf(status_fp,
+ "Name:\t%*s\n%*[ \ta-zA-Z0-9:()]\nTgid:\t%d\n",
+ &tgid);
+ } while (rc < 0 && errno == EINTR);
+ fclose(status_fp);
+
+ /* unable to read /proc/[pid]/status content */
+ if (rc != 1) {
+ debug3("jobacct_gather_cgroup: unable to read requested "
+ "pattern in %s",proc_status_file);
+ return -1;
+ }
+
+ /* if tgid differs from pid, this is a LWP (Thread POSIX) */
+ if ((uint32_t) tgid != (uint32_t) pid) {
+ debug3("jobacct_gather_cgroup: pid=%d is a lightweight process",
+ tgid);
+ return 1;
+ } else
+ return 0;
+}
+
+/* _get_process_data_line() - get line of data from /proc/<pid>/stat
+ *
+ * IN: in - input file descriptor
+ * OUT: prec - the destination for the data
+ *
+ * RETVAL: ==0 - no valid data
+ * !=0 - data are valid
+ *
+ * Based upon stat2proc() from the ps command. It can handle arbitrary
+ * executable file basenames for `cmd', i.e. those with embedded
+ * whitespace or embedded ')'s. Such names confuse %s (see scanf(3)),
+ * so the string is split and %39c is used instead.
+ * (except for embedded ')' "(%[^)]c)" would work.
+ */
+static int _get_process_data_line(int in, prec_t *prec) {
+ char sbuf[256], *tmp;
+ int num_read, nvals;
+ char cmd[40], state[1];
+ int ppid, pgrp, session, tty_nr, tpgid;
+ long unsigned flags, minflt, cminflt, majflt, cmajflt;
+ long unsigned utime, stime, starttime, vsize;
+ long int cutime, cstime, priority, nice, timeout, itrealvalue, rss;
+ long int f1,f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12;
+ long int f13,f14, lastcpu;
+
+ num_read = read(in, sbuf, (sizeof(sbuf) - 1));
+ if (num_read <= 0)
+ return 0;
+ sbuf[num_read] = '\0';
+
+ tmp = strrchr(sbuf, ')'); /* split into "PID (cmd" and "<rest>" */
+ *tmp = '\0'; /* replace trailing ')' with NUL */
+ /* parse these two strings separately, skipping the leading "(". */
+ nvals = sscanf(sbuf, "%d (%39c", &prec->pid, cmd);
+ if (nvals < 2)
+ return 0;
+
+ nvals = sscanf(tmp + 2, /* skip space after ')' too */
+ "%c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld %ld %ld "
+ "%ld %ld %lu %lu %ld "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %lu %ld %ld ",
+ state, &ppid, &pgrp, &session, &tty_nr, &tpgid,
+ &flags, &minflt, &cminflt, &majflt, &cmajflt,
+ &utime, &stime, &cutime, &cstime, &priority, &nice,
+ &timeout, &itrealvalue, &starttime, &vsize, &rss,
+ &f1, &f2, &f3, &f4, &f5 ,&f6, &f7, &f8, &f9, &f10, &f11,
+ &f12, &f13, &f14, &lastcpu);
+ /* There are some additional fields, which we do not scan or use */
+ if ((nvals < 37) || (rss < 0))
+ return 0;
+
+ /* If current pid corresponds to a Light Weight Process
+ * (Thread POSIX) skip it, we will only account the original
+ * process (pid==tgid)
+ */
+ if (_is_a_lwp(prec->pid) > 0)
+ return 0;
+
+ /* Copy the values that slurm records into our data structure */
+// prec->last_cpu = lastcpu;
+ return 1;
+}
+
+static void _task_sleep(int rem)
+{
+ while (rem)
+ rem = sleep(rem); /* subject to interupt */
+}
+
+/* _watch_tasks() -- monitor slurm jobs and track their memory usage
+ *
+ * IN, OUT: Irrelevant; this is invoked by pthread_create()
+ */
+
+static void *_watch_tasks(void *arg)
+{
+ /* Give chance for processes to spawn before starting
+ * the polling. This should largely eliminate the
+ * the chance of having /proc open when the tasks are
+ * spawned, which would prevent a valid checkpoint/restart
+ * with some systems */
+ _task_sleep(1);
+
+ while (!jobacct_shutdown) { /* Do this until shutdown is requested */
+ if (!jobacct_suspended)
+ _get_process_data(); /* Update the data */
+ _task_sleep(freq);
+ }
+ return NULL;
+}
+
+
+static void _destroy_prec(void *object)
+{
+ prec_t *prec = (prec_t *)object;
+ xfree(prec);
+ return;
+}
+
+/*
+ * init() is called when the plugin is loaded, before any other functions
+ * are called. Put global initialization here.
+ */
+extern int init (void)
+{
+ char *temp;
+
+ /* If running on the slurmctld don't do any of this since it
+ isn't needed.
+ */
+ if (bg_recover == NOT_FROM_CONTROLLER) {
+ /* read cgroup configuration */
+ if (read_slurm_cgroup_conf(&slurm_cgroup_conf))
+ return SLURM_ERROR;
+
+ /* initialize cpuinfo internal data */
+ if (xcpuinfo_init() != XCPUINFO_SUCCESS) {
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_ERROR;
+ }
+
+ /* enable cpuacct cgroup subsystem */
+ if (jobacct_gather_cgroup_cpuacct_init(&slurm_cgroup_conf) !=
+ SLURM_SUCCESS) {
+ xcpuinfo_fini();
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_ERROR;
+ }
+
+ /* enable memory cgroup subsystem */
+ if (jobacct_gather_cgroup_memory_init(&slurm_cgroup_conf) !=
+ SLURM_SUCCESS) {
+ xcpuinfo_fini();
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_ERROR;
+ }
+ info("WARNING: The %s plugin is experimental, and should "
+ "not currently be used in production environments.",
+ plugin_name);
+ }
+
+ temp = slurm_get_proctrack_type();
+ if (!strcasecmp(temp, "proctrack/pgid")) {
+ info("WARNING: We will use a much slower algorithm with "
+ "proctrack/pgid, use Proctracktype=proctrack/linuxproc "
+ "or Proctracktype=proctrack/rms with %s",
+ plugin_name);
+ pgid_plugin = true;
+ }
+
+ xfree(temp);
+ temp = slurm_get_accounting_storage_type();
+ if (!strcasecmp(temp, ACCOUNTING_STORAGE_TYPE_NONE)) {
+ error("WARNING: Even though we are collecting accounting "
+ "information you have asked for it not to be stored "
+ "(%s) if this is not what you have in mind you will "
+ "need to change it.", ACCOUNTING_STORAGE_TYPE_NONE);
+ }
+ xfree(temp);
+
+ verbose("%s loaded", plugin_name);
+ return SLURM_SUCCESS;
+}
+
+extern int fini (void)
+{
+ jobacct_gather_cgroup_cpuacct_fini(&slurm_cgroup_conf);
+ jobacct_gather_cgroup_memory_fini(&slurm_cgroup_conf);
+
+ /* unload configuration */
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_SUCCESS;
+}
+
+extern struct jobacctinfo *jobacct_gather_p_create(jobacct_id_t *jobacct_id)
+{
+ return jobacct_common_alloc_jobacct(jobacct_id);
+}
+
+extern void jobacct_gather_p_destroy(struct jobacctinfo *jobacct)
+{
+ jobacct_common_free_jobacct(jobacct);
+}
+
+extern int jobacct_gather_p_setinfo(struct jobacctinfo *jobacct,
+ enum jobacct_data_type type, void *data)
+{
+ return jobacct_common_setinfo(jobacct, type, data);
+}
+
+extern int jobacct_gather_p_getinfo(struct jobacctinfo *jobacct,
+ enum jobacct_data_type type, void *data)
+{
+ return jobacct_common_getinfo(jobacct, type, data);
+}
+
+extern void jobacct_gather_p_pack(struct jobacctinfo *jobacct,
+ uint16_t rpc_version, Buf buffer)
+{
+ jobacct_common_pack(jobacct, rpc_version, buffer);
+}
+
+extern int jobacct_gather_p_unpack(struct jobacctinfo **jobacct,
+ uint16_t rpc_version, Buf buffer)
+{
+ return jobacct_common_unpack(jobacct, rpc_version, buffer);
+}
+
+extern void jobacct_gather_p_aggregate(struct jobacctinfo *dest,
+ struct jobacctinfo *from)
+{
+ jobacct_common_aggregate(dest, from);
+}
+
+/*
+ * jobacct_startpoll() is called when the plugin is loaded by
+ * slurmd, before any other functions are called. Put global
+ * initialization here.
+ */
+
+extern int jobacct_gather_p_startpoll(uint16_t frequency)
+{
+ int rc = SLURM_SUCCESS;
+
+ pthread_attr_t attr;
+ pthread_t _watch_tasks_thread_id;
+
+ debug("%s loaded", plugin_name);
+
+ debug("jobacct-gather: frequency = %d", frequency);
+
+ jobacct_shutdown = false;
+
+ task_list = list_create(jobacct_common_free_jobacct);
+
+ if (frequency == 0) { /* don't want dynamic monitoring? */
+ debug2("jobacct-gather cgroup dynamic logging disabled");
+ return rc;
+ }
+
+ freq = frequency;
+ /* create polling thread */
+ slurm_attr_init(&attr);
+ if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
+ error("pthread_attr_setdetachstate error %m");
+
+ if (pthread_create(&_watch_tasks_thread_id, &attr,
+ &_watch_tasks, NULL)) {
+ debug("jobacct-gather failed to create _watch_tasks "
+ "thread: %m");
+ frequency = 0;
+ }
+ else
+ debug3("jobacct-gather cgroup dynamic logging enabled");
+ slurm_attr_destroy(&attr);
+
+ return rc;
+}
+
+extern int jobacct_gather_p_endpoll()
+{
+ jobacct_shutdown = true;
+ slurm_mutex_lock(&jobacct_lock);
+ if (task_list)
+ list_destroy(task_list);
+ task_list = NULL;
+ slurm_mutex_unlock(&jobacct_lock);
+
+ if (slash_proc) {
+ slurm_mutex_lock(&reading_mutex);
+ (void) closedir(slash_proc);
+ slurm_mutex_unlock(&reading_mutex);
+ }
+
+
+ return SLURM_SUCCESS;
+}
+
+extern void jobacct_gather_p_change_poll(uint16_t frequency)
+{
+ if (freq == 0 && frequency != 0) {
+ pthread_attr_t attr;
+ pthread_t _watch_tasks_thread_id;
+ /* create polling thread */
+ slurm_attr_init(&attr);
+ if (pthread_attr_setdetachstate(&attr,
+ PTHREAD_CREATE_DETACHED))
+ error("pthread_attr_setdetachstate error %m");
+
+ if (pthread_create(&_watch_tasks_thread_id, &attr,
+ &_watch_tasks, NULL)) {
+ debug("jobacct-gather failed to create _watch_tasks "
+ "thread: %m");
+ frequency = 0;
+ } else
+ debug3("jobacct-gather cgroup dynamic logging "
+ "enabled");
+ slurm_attr_destroy(&attr);
+ jobacct_shutdown = false;
+ }
+
+ freq = frequency;
+ debug("jobacct-gather: frequency changed = %d", frequency);
+ if (freq == 0)
+ jobacct_shutdown = true;
+ return;
+}
+
+extern void jobacct_gather_p_suspend_poll()
+{
+ jobacct_suspended = true;
+}
+
+extern void jobacct_gather_p_resume_poll()
+{
+ jobacct_suspended = false;
+}
+
+extern int jobacct_gather_p_set_proctrack_container_id(uint64_t id)
+{
+ if(pgid_plugin)
+ return SLURM_SUCCESS;
+
+ if (cont_id != (uint64_t)NO_VAL)
+ info("Warning: jobacct: set_proctrack_container_id: cont_id "
+ "is already set to %"PRIu64" you are setting it to "
+ "%"PRIu64"", cont_id, id);
+ if (id <= 0) {
+ error("jobacct: set_proctrack_container_id: "
+ "I was given most likely an unset cont_id %"PRIu64"",
+ id);
+ return SLURM_ERROR;
+ }
+ cont_id = id;
+ return SLURM_SUCCESS;
+}
+
+extern int jobacct_gather_p_add_task(pid_t pid, jobacct_id_t *jobacct_id)
+{
+ int rc;
+
+ if (jobacct_shutdown)
+ return SLURM_ERROR;
+ if ((rc = jobacct_common_add_task(pid, jobacct_id, task_list)) !=
+ SLURM_SUCCESS)
+ return SLURM_ERROR;
+
+ if (jobacct_gather_cgroup_cpuacct_attach_task(pid, jobacct_id) !=
+ SLURM_SUCCESS)
+ return SLURM_ERROR;
+
+ if (jobacct_gather_cgroup_memory_attach_task(pid, jobacct_id) !=
+ SLURM_SUCCESS)
+ return SLURM_ERROR;
+
+ return rc;
+}
+
+
+extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid)
+{
+ if (jobacct_shutdown)
+ return NULL;
+ else if(pid) {
+ _get_process_data();
+ return jobacct_common_stat_task(pid, task_list);
+ } else {
+ /* In this situation, we are just trying to get a
+ * basis of information since we are not pollng. So
+ * we will give a chance for processes to spawn before we
+ * gather information. This should largely eliminate the
+ * the chance of having /proc open when the tasks are
+ * spawned, which would prevent a valid checkpoint/restart
+ * with some systems */
+ _task_sleep(1);
+ _get_process_data();
+ return NULL;
+ }
+}
+
+extern struct jobacctinfo *jobacct_gather_p_remove_task(pid_t pid)
+{
+ if (jobacct_shutdown)
+ return NULL;
+ return jobacct_common_remove_task(pid, task_list);
+}
+
+extern void jobacct_gather_p_2_stats(slurmdb_stats_t *stats,
+ struct jobacctinfo *jobacct)
+{
+ jobacct_common_2_stats(stats, jobacct);
+}
+
+extern char* jobacct_cgroup_create_slurm_cg(xcgroup_ns_t* ns)
+ {
+ /* we do it here as we do not have access to the conf structure */
+ /* in libslurm (src/common/xcgroup.c) */
+ xcgroup_t slurm_cg;
+ char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
+#ifdef MULTIPLE_SLURMD
+ if (conf->node_name != NULL)
+ xstrsubstitute(pre,"%n", conf->node_name);
+ else {
+ xfree(pre);
+ pre = (char*) xstrdup("/slurm");
+ }
+#endif
+
+ /* create slurm cgroup in the ns (it could already exist) */
+ if (xcgroup_create(ns,&slurm_cg,pre,
+ getuid(), getgid()) != XCGROUP_SUCCESS) {
+ return pre;
+ }
+
+ if (xcgroup_instanciate(&slurm_cg) != XCGROUP_SUCCESS) {
+ error("unable to build slurm cgroup for ns %s: %m",
+ ns->subsystems);
+ xcgroup_destroy(&slurm_cg);
+ return pre;
+ } else {
+ debug3("slurm cgroup %s successfully created for ns %s: %m",
+ pre,ns->subsystems);
+ xcgroup_destroy(&slurm_cg);
+ }
+
+ return pre;
+}
+
diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h
new file mode 100644
index 0000000..470883b
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h
@@ -0,0 +1,68 @@
+/*****************************************************************************\
+ * jobacct_gather_cgroup.h - slurm job accounting gather plugin for cgroup.
+ *****************************************************************************
+ * Copyright (C) 2011 Bull.
+ * Written by Martin Perry, <martin.perry@bull.com>, who borrowed heavily
+ * from other parts of SLURM
+ * CODE-OCEC-09-009. All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * This file is patterned after jobcomp_linux.c, written by Morris Jette and
+ * Copyright (C) 2002 The Regents of the University of California.
+\*****************************************************************************/
+
+#include "src/common/xcgroup.h"
+#include "src/common/xcpuinfo.h"
+#include "src/common/jobacct_common.h"
+#include "src/common/xcgroup_read_config.h"
+
+extern xcgroup_t task_memory_cg;
+extern xcgroup_t task_cpuacct_cg;
+
+extern int jobacct_gather_cgroup_cpuacct_init(
+ slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+extern int jobacct_gather_cgroup_cpuacct_fini(
+ slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+extern int jobacct_gather_cgroup_cpuacct_attach_task(
+ pid_t pid, jobacct_id_t *jobacct_id);
+
+extern int jobacct_gather_cgroup_memory_init(
+ slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+extern int jobacct_gather_cgroup_memory_fini(
+ slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+extern int jobacct_gather_cgroup_memory_attach_task(
+ pid_t pid, jobacct_id_t *jobacct_id);
+
+extern char* jobacct_cgroup_create_slurm_cg (xcgroup_ns_t* ns);
diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c
new file mode 100644
index 0000000..449d464
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_cpuacct.c
@@ -0,0 +1,357 @@
+/**************************************************************************** \
+ * jobacct_gather_cgroup_cpuacct.c - cpuacct cgroup subsystem for
+ * jobacct_gather/cgroup
+ *****************************************************************************
+ * Copyright (C) 2011 Bull
+ * Written by Martin Perry (martin.perry@bull.com) based on code from
+ * Matthieu Hautreux
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <stdlib.h> /* getenv */
+
+#include "slurm/slurm_errno.h"
+#include "slurm/slurm.h"
+#include "src/common/xstring.h"
+#include "src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+#include "src/slurmd/slurmd/slurmd.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+static char user_cgroup_path[PATH_MAX];
+static char job_cgroup_path[PATH_MAX];
+static char jobstep_cgroup_path[PATH_MAX];
+static char task_cgroup_path[PATH_MAX];
+
+static xcgroup_ns_t cpuacct_ns;
+
+static xcgroup_t user_cpuacct_cg;
+static xcgroup_t job_cpuacct_cg;
+static xcgroup_t step_cpuacct_cg;
+xcgroup_t task_cpuacct_cg;
+
+extern int jobacct_gather_cgroup_cpuacct_init(
+ slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+ char release_agent_path[PATH_MAX];
+
+ /* initialize user/job/jobstep cgroup relative paths */
+ user_cgroup_path[0]='\0';
+ job_cgroup_path[0]='\0';
+ jobstep_cgroup_path[0]='\0';
+
+ /* initialize cpuacct cgroup namespace */
+ release_agent_path[0]='\0';
+ if (snprintf(release_agent_path, PATH_MAX, "%s/release_cpuacct",
+ slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build cpuacct release "
+ "agent path");
+ goto error;
+ }
+
+ if (xcgroup_ns_create(slurm_cgroup_conf, &cpuacct_ns, "/cpuacct", "",
+ "cpuacct", release_agent_path) !=
+ XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create cpuacct "
+ "namespace");
+ goto error;
+ }
+
+ /* check that cpuacct cgroup namespace is available */
+ if (!xcgroup_ns_is_available(&cpuacct_ns)) {
+ if (slurm_cgroup_conf->cgroup_automount) {
+ if (xcgroup_ns_mount(&cpuacct_ns)) {
+ error("jobacct_gather/cgroup: unable to mount "
+ "cpuacct namespace: %s",
+ slurm_strerror(errno));
+ goto clean;
+ }
+ info("jobacct_gather/cgroup: cpuacct namespace is now "
+ "mounted");
+ } else {
+ error("jobacct_gather/cgroup: cpuacct namespace not "
+ "mounted. aborting");
+ goto clean;
+ }
+ }
+ return SLURM_SUCCESS;
+
+clean:
+ xcgroup_ns_destroy(&cpuacct_ns);
+
+error:
+ return SLURM_ERROR;
+}
+
+extern int jobacct_gather_cgroup_cpuacct_fini(
+ slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+ xcgroup_t cpuacct_cg;
+
+ if (user_cgroup_path[0] == '\0' ||
+ job_cgroup_path[0] == '\0' ||
+ jobstep_cgroup_path[0] == '\0')
+ return SLURM_SUCCESS;
+
+ /*
+ * Move the slurmstepd back to the root cpuacct cg.
+ * The release_agent will asynchroneously be called for the step
+ * cgroup. It will do the necessary cleanup.
+ */
+ if (xcgroup_create(&cpuacct_ns, &cpuacct_cg, "", 0, 0)
+ == XCGROUP_SUCCESS) {
+ xcgroup_set_uint32_param(&cpuacct_cg, "tasks", getpid());
+ xcgroup_destroy(&cpuacct_cg);
+ }
+
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ xcgroup_destroy(&step_cpuacct_cg);
+
+ user_cgroup_path[0]='\0';
+ job_cgroup_path[0]='\0';
+ jobstep_cgroup_path[0]='\0';
+ xcgroup_ns_destroy(&cpuacct_ns);
+
+ return SLURM_SUCCESS;
+}
+
+extern int jobacct_gather_cgroup_cpuacct_attach_task(
+ pid_t pid, jobacct_id_t *jobacct_id)
+{
+ xcgroup_t cpuacct_cg;
+ slurmd_job_t *job;
+ uid_t uid;
+ gid_t gid;
+ uint32_t jobid;
+ uint32_t stepid;
+ uint32_t taskid;
+ int fstatus = SLURM_SUCCESS;
+ int rc;
+ char* slurm_cgpath;
+
+ job = jobacct_id->job;
+ uid = job->uid;
+ gid = job->gid;
+ jobid = job->jobid;
+ stepid = job->stepid;
+ taskid = jobacct_id->taskid;
+
+ /* create slurm root cg in this cg namespace */
+ slurm_cgpath = jobacct_cgroup_create_slurm_cg(&cpuacct_ns);
+ if (!slurm_cgpath) {
+ return SLURM_ERROR;
+ }
+
+ /* build user cgroup relative path if not set (may not be) */
+ if (*user_cgroup_path == '\0') {
+ if (snprintf(user_cgroup_path, PATH_MAX,
+ "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build uid %u "
+ "cgroup relative path", uid);
+ xfree(slurm_cgpath);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build job cgroup relative path if not set (may not be) */
+ if (*job_cgroup_path == '\0') {
+ if (snprintf(job_cgroup_path, PATH_MAX, "%s/job_%u",
+ user_cgroup_path, jobid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build job %u "
+ "cpuacct cg relative path : %m", jobid);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build job step cgroup relative path if not set (may not be) */
+ if (*jobstep_cgroup_path == '\0') {
+ if (snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u",
+ job_cgroup_path, stepid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build job step "
+ "%u cpuacct cg relative path : %m", stepid);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build task cgroup relative path */
+ if (snprintf(task_cgroup_path, PATH_MAX, "%s/task_%u",
+ jobstep_cgroup_path, taskid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build task %u "
+ "cpuacct cg relative path : %m", taskid);
+ return SLURM_ERROR;
+ }
+
+ fstatus = SLURM_SUCCESS;
+
+ /*
+ * create cpuacct root cg and lock it
+ *
+ * we will keep the lock until the end to avoid the effect of a release
+ * agent that would remove an existing cgroup hierarchy while we are
+ * setting it up. As soon as the step cgroup is created, we can release
+ * the lock.
+ * Indeed, consecutive slurm steps could result in cg being removed
+ * between the next EEXIST instanciation and the first addition of
+ * a task. The release_agent will have to lock the root cpuacct cgroup
+ * to avoid this scenario.
+ */
+
+ if (xcgroup_create(&cpuacct_ns, &cpuacct_cg, "", 0, 0)
+ != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create root cpuacct "
+ "xcgroup");
+ return SLURM_ERROR;
+ }
+ if (xcgroup_lock(&cpuacct_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to lock root cpuacct cg");
+ return SLURM_ERROR;
+ }
+
+ /*
+ * Create user cgroup in the cpuacct ns (it could already exist)
+ */
+ if (xcgroup_create(&cpuacct_ns, &user_cpuacct_cg,
+ user_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create user %u cpuacct "
+ "cgroup", uid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&user_cpuacct_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to instanciate user %u "
+ "cpuacct cgroup", uid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create job cgroup in the cpuacct ns (it could already exist)
+ */
+ if (xcgroup_create(&cpuacct_ns, &job_cpuacct_cg,
+ job_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to create job %u cpuacct "
+ "cgroup", jobid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&job_cpuacct_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to instanciate job %u "
+ "cpuacct cgroup", jobid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create step cgroup in the cpuacct ns (it could already exist)
+ */
+ if (xcgroup_create(&cpuacct_ns, &step_cpuacct_cg,
+ jobstep_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ /* do not delete user/job cgroup as they can exist for other
+ * steps, but release cgroup structures */
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to create jobstep %u.%u "
+ "cpuacct cgroup", jobid, stepid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&step_cpuacct_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ xcgroup_destroy(&step_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to instantiate jobstep "
+ "%u.%u cpuacct cgroup", jobid, stepid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create task cgroup in the cpuacct ns
+ */
+ if (xcgroup_create(&cpuacct_ns, &task_cpuacct_cg,
+ task_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ /* do not delete user/job cgroup as they can exist for other
+ * steps, but release cgroup structures */
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to create jobstep %u.%u "
+ "task %u cpuacct cgroup", jobid, stepid, taskid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&task_cpuacct_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_cpuacct_cg);
+ xcgroup_destroy(&job_cpuacct_cg);
+ xcgroup_destroy(&step_cpuacct_cg);
+ error("jobacct_gather/cgroup: unable to instantiate jobstep "
+ "%u.%u task %u cpuacct cgroup", jobid, stepid, taskid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Attach the slurmstepd to the task cpuacct cgroup
+ */
+ rc = xcgroup_add_pids(&task_cpuacct_cg, &pid, 1);
+ if (rc != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to add slurmstepd to "
+ "cpuacct cg '%s'", task_cpuacct_cg.path);
+ fstatus = SLURM_ERROR;
+ } else
+ fstatus = SLURM_SUCCESS;
+
+error:
+ xcgroup_unlock(&cpuacct_cg);
+ xcgroup_destroy(&cpuacct_cg);
+ return fstatus;
+}
diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c
new file mode 100644
index 0000000..e311faa
--- /dev/null
+++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup_memory.c
@@ -0,0 +1,367 @@
+/*****************************************************************************\
+ * jobacct_gather_cgroup_memory.c - memory cgroup subsystem for
+ * jobacct_gather/cgroup
+ *****************************************************************************
+ * Copyright (C) 2011 Bull
+ * Written by Martin Perry (martin.perry@bull.com) based on code from
+ * Matthieu Hautreux
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <stdlib.h> /* getenv */
+
+#include "slurm/slurm_errno.h"
+#include "slurm/slurm.h"
+#include "src/common/xstring.h"
+#include "src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+#include "src/slurmd/slurmd/slurmd.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+static char user_cgroup_path[PATH_MAX];
+static char job_cgroup_path[PATH_MAX];
+static char jobstep_cgroup_path[PATH_MAX];
+static char task_cgroup_path[PATH_MAX];
+
+static xcgroup_ns_t memory_ns;
+
+static xcgroup_t user_memory_cg;
+static xcgroup_t job_memory_cg;
+static xcgroup_t step_memory_cg;
+xcgroup_t task_memory_cg;
+
+
+extern int jobacct_gather_cgroup_memory_init(
+ slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+ char release_agent_path[PATH_MAX];
+
+ /* initialize user/job/jobstep cgroup relative paths */
+ user_cgroup_path[0]='\0';
+ job_cgroup_path[0]='\0';
+ jobstep_cgroup_path[0]='\0';
+
+ /* initialize memory cgroup namespace */
+ release_agent_path[0]='\0';
+ if (snprintf(release_agent_path, PATH_MAX, "%s/release_memory",
+ slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build memory release "
+ "agent path");
+ goto error;
+ }
+ if (xcgroup_ns_create(slurm_cgroup_conf, &memory_ns, "/memory", "",
+ "memory", release_agent_path) !=
+ XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create memory "
+ "namespace");
+ goto error;
+ }
+
+ /* check that memory cgroup namespace is available */
+ if (!xcgroup_ns_is_available(&memory_ns)) {
+ if (slurm_cgroup_conf->cgroup_automount) {
+ if (xcgroup_ns_mount(&memory_ns)) {
+ error("jobacct_gather/cgroup: unable to mount "
+ "memory namespace: %s",
+ slurm_strerror(errno));
+ goto clean;
+ }
+ info("jobacct_gather/cgroup: memory namespace is now "
+ "mounted");
+ } else {
+ error("jobacct_gather/cgroup: memory namespace not "
+ "mounted. aborting");
+ goto clean;
+ }
+ }
+ return SLURM_SUCCESS;
+
+clean:
+ xcgroup_ns_destroy(&memory_ns);
+
+error:
+ return SLURM_ERROR;
+}
+
+extern int jobacct_gather_cgroup_memory_fini(
+ slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+ xcgroup_t memory_cg;
+
+ if (user_cgroup_path[0] == '\0' ||
+ job_cgroup_path[0] == '\0' ||
+ jobstep_cgroup_path[0] == '\0')
+ return SLURM_SUCCESS;
+
+ /*
+ * Move the slurmstepd back to the root memory cg and force empty
+ * the step cgroup to move its allocated pages to its parent.
+ * The release_agent will asynchroneously be called for the step
+ * cgroup. It will do the necessary cleanup.
+ * It should be good if this force_empty mech could be done directly
+ * by the memcg implementation at the end of the last task managed
+ * by a cgroup. It is too difficult and near impossible to handle
+ * that cleanup correctly with current memcg.
+ */
+ if (xcgroup_create(&memory_ns, &memory_cg, "", 0, 0)
+ == XCGROUP_SUCCESS) {
+ xcgroup_set_uint32_param(&memory_cg, "tasks", getpid());
+ xcgroup_destroy(&memory_cg);
+ xcgroup_set_param(&step_memory_cg, "memory.force_empty", "1");
+ }
+
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ xcgroup_destroy(&step_memory_cg);
+
+ user_cgroup_path[0]='\0';
+ job_cgroup_path[0]='\0';
+ jobstep_cgroup_path[0]='\0';
+
+ xcgroup_ns_destroy(&memory_ns);
+
+ return SLURM_SUCCESS;
+}
+
+extern int jobacct_gather_cgroup_memory_attach_task(
+ pid_t pid, jobacct_id_t *jobacct_id)
+{
+ xcgroup_t memory_cg;
+ slurmd_job_t *job;
+ uid_t uid;
+ gid_t gid;
+ uint32_t jobid;
+ uint32_t stepid;
+ uint32_t taskid;
+ int fstatus = SLURM_SUCCESS;
+ int rc;
+ char* slurm_cgpath;
+
+ job = jobacct_id->job;
+ uid = job->uid;
+ gid = job->gid;
+ jobid = job->jobid;
+ stepid = job->stepid;
+ taskid = jobacct_id->taskid;
+
+ /* create slurm root cg in this cg namespace */
+ slurm_cgpath = jobacct_cgroup_create_slurm_cg(&memory_ns);
+ if (!slurm_cgpath) {
+ return SLURM_ERROR;
+ }
+
+ /* build user cgroup relative path if not set (should not be) */
+ if (*user_cgroup_path == '\0') {
+ if (snprintf(user_cgroup_path, PATH_MAX,
+ "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) {
+ error("unable to build uid %u cgroup relative "
+ "path : %m", uid);
+ xfree(slurm_cgpath);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build job cgroup relative path if not set (may not be) */
+ if (*job_cgroup_path == '\0') {
+ if (snprintf(job_cgroup_path, PATH_MAX, "%s/job_%u",
+ user_cgroup_path, jobid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build job %u "
+ "memory cg relative path : %m", jobid);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build job step cgroup relative path if not set (may not be) */
+ if (*jobstep_cgroup_path == '\0') {
+ if (snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u",
+ job_cgroup_path, stepid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build job step "
+ "%u memory cg relative path : %m", stepid);
+ return SLURM_ERROR;
+ }
+ }
+
+ /* build task cgroup relative path */
+ if (snprintf(task_cgroup_path, PATH_MAX, "%s/task_%u",
+ jobstep_cgroup_path, taskid) >= PATH_MAX) {
+ error("jobacct_gather/cgroup: unable to build task %u "
+ "memory cg relative path : %m", taskid);
+ return SLURM_ERROR;
+ }
+
+ fstatus = SLURM_SUCCESS;
+
+ /*
+ * create memory root cg and lock it
+ *
+ * we will keep the lock until the end to avoid the effect of a release
+ * agent that would remove an existing cgroup hierarchy while we are
+ * setting it up. As soon as the step cgroup is created, we can release
+ * the lock.
+ * Indeed, consecutive slurm steps could result in cg being removed
+ * between the next EEXIST instanciation and the first addition of
+ * a task. The release_agent will have to lock the root memory cgroup
+ * to avoid this scenario.
+ */
+
+ if (xcgroup_create(&memory_ns, &memory_cg, "", 0, 0)
+ != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create root memory "
+ "xcgroup");
+ return SLURM_ERROR;
+ }
+ if (xcgroup_lock(&memory_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&memory_cg);
+ error("jobacct_gather/cgroup: unable to lock root memory cg");
+ return SLURM_ERROR;
+ }
+
+ /*
+ * Create user cgroup in the memory ns (it could already exist)
+ * Ask for hierarchical memory accounting starting from the user
+ * container in order to track the memory consumption up to the
+ * user.
+ */
+ if (xcgroup_create(&memory_ns, &user_memory_cg,
+ user_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to create user %u memory "
+ "cgroup", uid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&user_memory_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_memory_cg);
+ error("jobacct_gather/cgroup: unable to instanciate user %u "
+ "memory cgroup", uid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create job cgroup in the memory ns (it could already exist)
+ */
+ if (xcgroup_create(&memory_ns, &job_memory_cg,
+ job_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_memory_cg);
+ error("jobacct_gather/cgroup: unable to create job %u memory "
+ "cgroup", jobid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&job_memory_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ error("jobacct_gather/cgroup: unable to instanciate job %u "
+ "memory cgroup", jobid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create step cgroup in the memory ns (it could already exist)
+ */
+ if (xcgroup_create(&memory_ns, &step_memory_cg,
+ jobstep_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ /* do not delete user/job cgroup as they can exist for other
+ * steps, but release cgroup structures */
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ error("jobacct_gather/cgroup: unable to create jobstep %u.%u "
+ "memory cgroup", jobid, stepid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&step_memory_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ xcgroup_destroy(&step_memory_cg);
+ error("jobacct_gather/cgroup: unable to instantiate jobstep "
+ "%u.%u memory cgroup", jobid, stepid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Create task cgroup in the memory ns
+ */
+ if (xcgroup_create(&memory_ns, &task_memory_cg,
+ task_cgroup_path,
+ uid, gid) != XCGROUP_SUCCESS) {
+ /* do not delete user/job cgroup as they can exist for other
+ * steps, but release cgroup structures */
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ error("jobacct_gather/cgroup: unable to create jobstep %u.%u "
+ "task %u memory cgroup", jobid, stepid, taskid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ if (xcgroup_instanciate(&task_memory_cg) != XCGROUP_SUCCESS) {
+ xcgroup_destroy(&user_memory_cg);
+ xcgroup_destroy(&job_memory_cg);
+ xcgroup_destroy(&step_memory_cg);
+ error("jobacct_gather/cgroup: unable to instantiate jobstep "
+ "%u.%u task %u memory cgroup", jobid, stepid, taskid);
+ fstatus = SLURM_ERROR;
+ goto error;
+ }
+
+ /*
+ * Attach the slurmstepd to the task memory cgroup
+ */
+ rc = xcgroup_add_pids(&task_memory_cg, &pid, 1);
+ if (rc != XCGROUP_SUCCESS) {
+ error("jobacct_gather/cgroup: unable to add slurmstepd to "
+ "memory cg '%s'", task_memory_cg.path);
+ fstatus = SLURM_ERROR;
+ } else
+ fstatus = SLURM_SUCCESS;
+
+error:
+ xcgroup_unlock(&memory_cg);
+ xcgroup_destroy(&memory_cg);
+ return fstatus;
+}
diff --git a/src/plugins/jobacct_gather/linux/Makefile.in b/src/plugins/jobacct_gather/linux/Makefile.in
index 0903e64..48a3ebb 100644
--- a/src/plugins/jobacct_gather/linux/Makefile.in
+++ b/src/plugins/jobacct_gather/linux/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobacct_gather_linux_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -405,7 +413,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobacct_gather_linux.la: $(jobacct_gather_linux_la_OBJECTS) $(jobacct_gather_linux_la_DEPENDENCIES)
+jobacct_gather_linux.la: $(jobacct_gather_linux_la_OBJECTS) $(jobacct_gather_linux_la_DEPENDENCIES) $(EXTRA_jobacct_gather_linux_la_DEPENDENCIES)
$(jobacct_gather_linux_la_LINK) -rpath $(pkglibdir) $(jobacct_gather_linux_la_OBJECTS) $(jobacct_gather_linux_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobacct_gather/none/Makefile.in b/src/plugins/jobacct_gather/none/Makefile.in
index b1f3e84..cd9af87 100644
--- a/src/plugins/jobacct_gather/none/Makefile.in
+++ b/src/plugins/jobacct_gather/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobacct_gather_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobacct_gather_none.la: $(jobacct_gather_none_la_OBJECTS) $(jobacct_gather_none_la_DEPENDENCIES)
+jobacct_gather_none.la: $(jobacct_gather_none_la_OBJECTS) $(jobacct_gather_none_la_DEPENDENCIES) $(EXTRA_jobacct_gather_none_la_DEPENDENCIES)
$(jobacct_gather_none_la_LINK) -rpath $(pkglibdir) $(jobacct_gather_none_la_OBJECTS) $(jobacct_gather_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/Makefile.in b/src/plugins/jobcomp/Makefile.in
index b15c6c4..87b0c5f 100644
--- a/src/plugins/jobcomp/Makefile.in
+++ b/src/plugins/jobcomp/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/filetxt/Makefile.in b/src/plugins/jobcomp/filetxt/Makefile.in
index c7d3fb5..2fbc2b7 100644
--- a/src/plugins/jobcomp/filetxt/Makefile.in
+++ b/src/plugins/jobcomp/filetxt/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobcomp_filetxt_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,7 +415,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobcomp_filetxt.la: $(jobcomp_filetxt_la_OBJECTS) $(jobcomp_filetxt_la_DEPENDENCIES)
+jobcomp_filetxt.la: $(jobcomp_filetxt_la_OBJECTS) $(jobcomp_filetxt_la_DEPENDENCIES) $(EXTRA_jobcomp_filetxt_la_DEPENDENCIES)
$(jobcomp_filetxt_la_LINK) -rpath $(pkglibdir) $(jobcomp_filetxt_la_OBJECTS) $(jobcomp_filetxt_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -545,10 +553,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/mysql/Makefile.in b/src/plugins/jobcomp/mysql/Makefile.in
index 35d12c2..7f60149 100644
--- a/src/plugins/jobcomp/mysql/Makefile.in
+++ b/src/plugins/jobcomp/mysql/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -217,6 +223,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -253,6 +260,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -423,7 +431,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobcomp_mysql.la: $(jobcomp_mysql_la_OBJECTS) $(jobcomp_mysql_la_DEPENDENCIES)
+jobcomp_mysql.la: $(jobcomp_mysql_la_OBJECTS) $(jobcomp_mysql_la_DEPENDENCIES) $(EXTRA_jobcomp_mysql_la_DEPENDENCIES)
$(jobcomp_mysql_la_LINK) $(am_jobcomp_mysql_la_rpath) $(jobcomp_mysql_la_OBJECTS) $(jobcomp_mysql_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -575,10 +583,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/none/Makefile.in b/src/plugins/jobcomp/none/Makefile.in
index 48b6f47..4c49106 100644
--- a/src/plugins/jobcomp/none/Makefile.in
+++ b/src/plugins/jobcomp/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobcomp_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobcomp_none.la: $(jobcomp_none_la_OBJECTS) $(jobcomp_none_la_DEPENDENCIES)
+jobcomp_none.la: $(jobcomp_none_la_OBJECTS) $(jobcomp_none_la_DEPENDENCIES) $(EXTRA_jobcomp_none_la_DEPENDENCIES)
$(jobcomp_none_la_LINK) -rpath $(pkglibdir) $(jobcomp_none_la_OBJECTS) $(jobcomp_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/pgsql/Makefile.in b/src/plugins/jobcomp/pgsql/Makefile.in
index 737ff8a..3cfb05a 100644
--- a/src/plugins/jobcomp/pgsql/Makefile.in
+++ b/src/plugins/jobcomp/pgsql/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -217,6 +223,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -253,6 +260,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -424,7 +432,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobcomp_pgsql.la: $(jobcomp_pgsql_la_OBJECTS) $(jobcomp_pgsql_la_DEPENDENCIES)
+jobcomp_pgsql.la: $(jobcomp_pgsql_la_OBJECTS) $(jobcomp_pgsql_la_DEPENDENCIES) $(EXTRA_jobcomp_pgsql_la_DEPENDENCIES)
$(jobcomp_pgsql_la_LINK) $(am_jobcomp_pgsql_la_rpath) $(jobcomp_pgsql_la_OBJECTS) $(jobcomp_pgsql_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -576,10 +584,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/jobcomp/script/Makefile.in b/src/plugins/jobcomp/script/Makefile.in
index f8d86a1..e30f886 100644
--- a/src/plugins/jobcomp/script/Makefile.in
+++ b/src/plugins/jobcomp/script/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
jobcomp_script_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-jobcomp_script.la: $(jobcomp_script_la_OBJECTS) $(jobcomp_script_la_DEPENDENCIES)
+jobcomp_script.la: $(jobcomp_script_la_OBJECTS) $(jobcomp_script_la_DEPENDENCIES) $(EXTRA_jobcomp_script_la_DEPENDENCIES)
$(jobcomp_script_la_LINK) -rpath $(pkglibdir) $(jobcomp_script_la_OBJECTS) $(jobcomp_script_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/Makefile.am b/src/plugins/mpi/Makefile.am
index c69678a..dda7bba 100644
--- a/src/plugins/mpi/Makefile.am
+++ b/src/plugins/mpi/Makefile.am
@@ -1,3 +1,3 @@
# Makefile for mpi plugins
-SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi
+SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi pmi2
diff --git a/src/plugins/mpi/Makefile.in b/src/plugins/mpi/Makefile.in
index f810b86..3ba01f3 100644
--- a/src/plugins/mpi/Makefile.in
+++ b/src/plugins/mpi/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -326,7 +328,7 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi
+SUBDIRS = mpich1_p4 mpich1_shmem mpichgm mpichmx mvapich none lam openmpi pmi2
all: all-recursive
.SUFFIXES:
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/lam/Makefile.in b/src/plugins/mpi/lam/Makefile.in
index 9bb03dd..cc718e3 100644
--- a/src/plugins/mpi/lam/Makefile.in
+++ b/src/plugins/mpi/lam/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_lam_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +410,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_lam.la: $(mpi_lam_la_OBJECTS) $(mpi_lam_la_DEPENDENCIES)
+mpi_lam.la: $(mpi_lam_la_OBJECTS) $(mpi_lam_la_DEPENDENCIES) $(EXTRA_mpi_lam_la_DEPENDENCIES)
$(mpi_lam_la_LINK) -rpath $(pkglibdir) $(mpi_lam_la_OBJECTS) $(mpi_lam_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -539,10 +547,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/lam/mpi_lam.c b/src/plugins/mpi/lam/mpi_lam.c
index 835377d..b631242 100644
--- a/src/plugins/mpi/lam/mpi_lam.c
+++ b/src/plugins/mpi/lam/mpi_lam.c
@@ -48,6 +48,7 @@
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/plugins/mpi/lam/lam.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -80,6 +81,12 @@
const char plugin_type[] = "mpi/lam";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/lam: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -95,7 +102,7 @@
return (void *)0xdeadbeef;
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return true;
}
diff --git a/src/plugins/mpi/mpich1_p4/Makefile.in b/src/plugins/mpi/mpich1_p4/Makefile.in
index 63940df..813b7a3 100644
--- a/src/plugins/mpi/mpich1_p4/Makefile.in
+++ b/src/plugins/mpi/mpich1_p4/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_mpich1_p4_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_mpich1_p4.la: $(mpi_mpich1_p4_la_OBJECTS) $(mpi_mpich1_p4_la_DEPENDENCIES)
+mpi_mpich1_p4.la: $(mpi_mpich1_p4_la_OBJECTS) $(mpi_mpich1_p4_la_DEPENDENCIES) $(EXTRA_mpi_mpich1_p4_la_DEPENDENCIES)
$(mpi_mpich1_p4_la_LINK) -rpath $(pkglibdir) $(mpi_mpich1_p4_la_OBJECTS) $(mpi_mpich1_p4_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/mpich1_p4/mpich1_p4.c b/src/plugins/mpi/mpich1_p4/mpich1_p4.c
index 201960f..e6b0dca 100644
--- a/src/plugins/mpi/mpich1_p4/mpich1_p4.c
+++ b/src/plugins/mpi/mpich1_p4/mpich1_p4.c
@@ -55,6 +55,7 @@
#include "src/common/net.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -103,7 +104,13 @@
static pthread_cond_t shutdown_cond;
-int p_mpi_hook_slurmstepd_task (const mpi_plugin_client_info_t *job,
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/mpich1_p4: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
+int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
char ***env)
{
char *nodelist, *task_cnt;
@@ -316,7 +323,7 @@
return (void *)0xdeadbeef;
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return true;
}
diff --git a/src/plugins/mpi/mpich1_shmem/Makefile.in b/src/plugins/mpi/mpich1_shmem/Makefile.in
index c6ef02e..efcdf62 100644
--- a/src/plugins/mpi/mpich1_shmem/Makefile.in
+++ b/src/plugins/mpi/mpich1_shmem/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_mpich1_shmem_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +410,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_mpich1_shmem.la: $(mpi_mpich1_shmem_la_OBJECTS) $(mpi_mpich1_shmem_la_DEPENDENCIES)
+mpi_mpich1_shmem.la: $(mpi_mpich1_shmem_la_OBJECTS) $(mpi_mpich1_shmem_la_DEPENDENCIES) $(EXTRA_mpi_mpich1_shmem_la_DEPENDENCIES)
$(mpi_mpich1_shmem_la_LINK) -rpath $(pkglibdir) $(mpi_mpich1_shmem_la_OBJECTS) $(mpi_mpich1_shmem_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -539,10 +547,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c b/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c
index bc1e2f8..d2626df 100644
--- a/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c
+++ b/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c
@@ -48,6 +48,7 @@
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/plugins/mpi/lam/lam.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -80,6 +81,12 @@
const char plugin_type[] = "mpi/mpich1_shmem";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/mpich1_shmem: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -96,7 +103,7 @@
return (void *)0xdeadbeef;
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return true;
}
diff --git a/src/plugins/mpi/mpichgm/Makefile.in b/src/plugins/mpi/mpichgm/Makefile.in
index 6a638aa..9246948 100644
--- a/src/plugins/mpi/mpichgm/Makefile.in
+++ b/src/plugins/mpi/mpichgm/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_mpichgm_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_mpichgm.la: $(mpi_mpichgm_la_OBJECTS) $(mpi_mpichgm_la_DEPENDENCIES)
+mpi_mpichgm.la: $(mpi_mpichgm_la_OBJECTS) $(mpi_mpichgm_la_DEPENDENCIES) $(EXTRA_mpi_mpichgm_la_DEPENDENCIES)
$(mpi_mpichgm_la_LINK) -rpath $(pkglibdir) $(mpi_mpichgm_la_OBJECTS) $(mpi_mpichgm_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -544,10 +552,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/mpichgm/mpi_mpichgm.c b/src/plugins/mpi/mpichgm/mpi_mpichgm.c
index 422718a..b0f7734 100644
--- a/src/plugins/mpi/mpichgm/mpi_mpichgm.c
+++ b/src/plugins/mpi/mpichgm/mpi_mpichgm.c
@@ -49,6 +49,7 @@
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/plugins/mpi/mpichgm/mpichgm.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -81,6 +82,12 @@
const char plugin_type[] = "mpi/mpichgm";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/mpichgm: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -119,7 +126,7 @@
return (mpi_plugin_client_state_t *)gmpi_thr_create(job, env);
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return false;
}
diff --git a/src/plugins/mpi/mpichmx/Makefile.in b/src/plugins/mpi/mpichmx/Makefile.in
index 0700410..c147983 100644
--- a/src/plugins/mpi/mpichmx/Makefile.in
+++ b/src/plugins/mpi/mpichmx/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_mpichmx_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_mpichmx.la: $(mpi_mpichmx_la_OBJECTS) $(mpi_mpichmx_la_DEPENDENCIES)
+mpi_mpichmx.la: $(mpi_mpichmx_la_OBJECTS) $(mpi_mpichmx_la_DEPENDENCIES) $(EXTRA_mpi_mpichmx_la_DEPENDENCIES)
$(mpi_mpichmx_la_LINK) -rpath $(pkglibdir) $(mpi_mpichmx_la_OBJECTS) $(mpi_mpichmx_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -544,10 +552,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/mpichmx/mpi_mpichmx.c b/src/plugins/mpi/mpichmx/mpi_mpichmx.c
index f13b698..2df42bf 100644
--- a/src/plugins/mpi/mpichmx/mpi_mpichmx.c
+++ b/src/plugins/mpi/mpichmx/mpi_mpichmx.c
@@ -48,6 +48,7 @@
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/plugins/mpi/mpichmx/mpichmx.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -80,6 +81,12 @@
const char plugin_type[] = "mpi/mpichmx";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/mpichmx: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -118,7 +125,7 @@
return (mpi_plugin_client_state_t *)gmpi_thr_create(job, env);
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return false;
}
diff --git a/src/plugins/mpi/mvapich/Makefile.in b/src/plugins/mpi/mvapich/Makefile.in
index 7d0961c..6bb9fab 100644
--- a/src/plugins/mpi/mvapich/Makefile.in
+++ b/src/plugins/mpi/mvapich/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_mvapich_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_mvapich.la: $(mpi_mvapich_la_OBJECTS) $(mpi_mvapich_la_DEPENDENCIES)
+mpi_mvapich.la: $(mpi_mvapich_la_OBJECTS) $(mpi_mvapich_la_DEPENDENCIES) $(EXTRA_mpi_mvapich_la_DEPENDENCIES)
$(mpi_mvapich_la_LINK) -rpath $(pkglibdir) $(mpi_mvapich_la_OBJECTS) $(mpi_mvapich_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/mvapich/mpi_mvapich.c b/src/plugins/mpi/mvapich/mpi_mvapich.c
index 0b33ead..b6125d6 100644
--- a/src/plugins/mpi/mvapich/mpi_mvapich.c
+++ b/src/plugins/mpi/mvapich/mpi_mvapich.c
@@ -49,6 +49,7 @@
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/plugins/mpi/mvapich/mvapich.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -81,6 +82,12 @@
const char plugin_type[] = "mpi/mvapich";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/mvapich: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -127,7 +134,7 @@
return (mpi_plugin_client_state_t *)mvapich_thr_create(job, env);
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return false;
}
diff --git a/src/plugins/mpi/none/Makefile.in b/src/plugins/mpi/none/Makefile.in
index ee88db3..0ff92f9 100644
--- a/src/plugins/mpi/none/Makefile.in
+++ b/src/plugins/mpi/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_none.la: $(mpi_none_la_OBJECTS) $(mpi_none_la_DEPENDENCIES)
+mpi_none.la: $(mpi_none_la_OBJECTS) $(mpi_none_la_DEPENDENCIES) $(EXTRA_mpi_none_la_DEPENDENCIES)
$(mpi_none_la_LINK) -rpath $(pkglibdir) $(mpi_none_la_OBJECTS) $(mpi_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/none/mpi_none.c b/src/plugins/mpi/none/mpi_none.c
index 514d237..c73b1de 100644
--- a/src/plugins/mpi/none/mpi_none.c
+++ b/src/plugins/mpi/none/mpi_none.c
@@ -50,6 +50,7 @@
#include "src/common/slurm_xlator.h"
#include "src/common/mpi.h"
#include "src/common/env.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -82,6 +83,12 @@
const char plugin_type[] = "mpi/none";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/none: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t*job,
char ***env)
{
@@ -97,7 +104,7 @@
return (void *)0xdeadbeef;
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return false;
}
diff --git a/src/plugins/mpi/openmpi/Makefile.in b/src/plugins/mpi/openmpi/Makefile.in
index 069bbca..6cb9951 100644
--- a/src/plugins/mpi/openmpi/Makefile.in
+++ b/src/plugins/mpi/openmpi/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
mpi_openmpi_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-mpi_openmpi.la: $(mpi_openmpi_la_OBJECTS) $(mpi_openmpi_la_DEPENDENCIES)
+mpi_openmpi.la: $(mpi_openmpi_la_OBJECTS) $(mpi_openmpi_la_DEPENDENCIES) $(EXTRA_mpi_openmpi_la_DEPENDENCIES)
$(mpi_openmpi_la_LINK) -rpath $(pkglibdir) $(mpi_openmpi_la_OBJECTS) $(mpi_openmpi_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/mpi/openmpi/mpi_openmpi.c b/src/plugins/mpi/openmpi/mpi_openmpi.c
index c7e9ea5..579633a 100644
--- a/src/plugins/mpi/openmpi/mpi_openmpi.c
+++ b/src/plugins/mpi/openmpi/mpi_openmpi.c
@@ -50,6 +50,7 @@
#include "src/common/slurm_xlator.h"
#include "src/common/mpi.h"
#include "src/common/env.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
/*
* These variables are required by the generic plugin interface. If they
@@ -82,6 +83,12 @@
const char plugin_type[] = "mpi/openmpi";
const uint32_t plugin_version = 100;
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+ debug("mpi/openmpi: slurmstepd prefork");
+ return SLURM_SUCCESS;
+}
+
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
char ***env)
{
@@ -97,7 +104,7 @@
return (void *)0xdeadbeef;
}
-int p_mpi_hook_client_single_task_per_node()
+int p_mpi_hook_client_single_task_per_node(void)
{
return false;
}
diff --git a/src/plugins/mpi/pmi2/Makefile.am b/src/plugins/mpi/pmi2/Makefile.am
new file mode 100644
index 0000000..30663b4
--- /dev/null
+++ b/src/plugins/mpi/pmi2/Makefile.am
@@ -0,0 +1,29 @@
+# Makefile for mpi/pmi2 plugin
+
+AUTOMAKE_OPTIONS = foreign
+
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+
+pkglib_LTLIBRARIES = mpi_pmi2.la
+
+mpi_pmi2_la_SOURCES = mpi_pmi2.c \
+ agent.c agent.h \
+ client.c client.h \
+ kvs.c kvs.h \
+ info.c info.h \
+ pmi1.c pmi2.c pmi.h \
+ setup.c setup.h \
+ spawn.c spawn.h \
+ tree.c tree.h \
+ $(top_srcdir)/src/common/mpi.h \
+ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c \
+ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.h
+
+mpi_pmi2_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+
+force:
+
+$(mpi_pmi2_LDADD) : force
+ @cd `dirname $@` && $(MAKE) `basename $@`
diff --git a/src/plugins/mpi/pmi2/Makefile.in b/src/plugins/mpi/pmi2/Makefile.in
new file mode 100644
index 0000000..aae662b
--- /dev/null
+++ b/src/plugins/mpi/pmi2/Makefile.in
@@ -0,0 +1,695 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile for mpi/pmi2 plugin
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/plugins/mpi/pmi2
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(pkglibdir)"
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+mpi_pmi2_la_LIBADD =
+am_mpi_pmi2_la_OBJECTS = mpi_pmi2.lo agent.lo client.lo kvs.lo info.lo \
+ pmi1.lo pmi2.lo setup.lo spawn.lo tree.lo reverse_tree_math.lo
+mpi_pmi2_la_OBJECTS = $(am_mpi_pmi2_la_OBJECTS)
+mpi_pmi2_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(mpi_pmi2_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
+depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(mpi_pmi2_la_SOURCES)
+DIST_SOURCES = $(mpi_pmi2_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = foreign
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+pkglib_LTLIBRARIES = mpi_pmi2.la
+mpi_pmi2_la_SOURCES = mpi_pmi2.c \
+ agent.c agent.h \
+ client.c client.h \
+ kvs.c kvs.h \
+ info.c info.h \
+ pmi1.c pmi2.c pmi.h \
+ setup.c setup.h \
+ spawn.c spawn.h \
+ tree.c tree.h \
+ $(top_srcdir)/src/common/mpi.h \
+ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c \
+ $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.h
+
+mpi_pmi2_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/mpi/pmi2/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/plugins/mpi/pmi2/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)"
+ @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \
+ }
+
+uninstall-pkglibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \
+ done
+
+clean-pkglibLTLIBRARIES:
+ -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+mpi_pmi2.la: $(mpi_pmi2_la_OBJECTS) $(mpi_pmi2_la_DEPENDENCIES) $(EXTRA_mpi_pmi2_la_DEPENDENCIES)
+ $(mpi_pmi2_la_LINK) -rpath $(pkglibdir) $(mpi_pmi2_la_OBJECTS) $(mpi_pmi2_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/agent.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/client.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kvs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_pmi2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pmi2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reverse_tree_math.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/setup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spawn.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tree.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+reverse_tree_math.lo: $(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT reverse_tree_math.lo -MD -MP -MF $(DEPDIR)/reverse_tree_math.Tpo -c -o reverse_tree_math.lo `test -f '$(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c' || echo '$(srcdir)/'`$(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/reverse_tree_math.Tpo $(DEPDIR)/reverse_tree_math.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c' object='reverse_tree_math.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o reverse_tree_math.lo `test -f '$(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c' || echo '$(srcdir)/'`$(top_srcdir)/src/slurmd/slurmd/reverse_tree_math.c
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(pkglibdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-pkglibLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-pkglibLTLIBRARIES \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES
+
+
+force:
+
+$(mpi_pmi2_LDADD) : force
+ @cd `dirname $@` && $(MAKE) `basename $@`
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/mpi/pmi2/agent.c b/src/plugins/mpi/pmi2/agent.c
new file mode 100644
index 0000000..11b5ea1
--- /dev/null
+++ b/src/plugins/mpi/pmi2/agent.c
@@ -0,0 +1,367 @@
+/*****************************************************************************\
+ ** agent.c - PMI2 handling thread
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <arpa/inet.h>
+#include <poll.h>
+
+#include "src/common/slurm_xlator.h"
+#include "src/common/eio.h"
+#include "src/common/mpi.h"
+#include "src/common/xstring.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+
+#include "client.h"
+#include "pmi.h"
+#include "setup.h"
+
+#define MAX_RETRIES 5
+
+static int *initialized = NULL;
+static int *finalized = NULL;
+
+static pthread_t pmi2_agent_tid = 0;
+
+static bool _tree_listen_readable(eio_obj_t *obj);
+static int _tree_listen_read(eio_obj_t *obj, List objs);
+static struct io_operations tree_listen_ops = {
+readable: &_tree_listen_readable,
+handle_read: &_tree_listen_read,
+};
+
+static bool _task_readable(eio_obj_t *obj);
+static int _task_read(eio_obj_t *obj, List objs);
+/* static bool _task_writable(eio_obj_t *obj); */
+/* static int _task_write(eio_obj_t *obj, List objs); */
+static struct io_operations task_ops = {
+readable: &_task_readable,
+handle_read: &_task_read,
+};
+
+
+static int _handle_pmi1_init(int fd, int lrank);
+
+/*********************************************************************/
+
+static int
+_handle_task_request(int fd, int lrank)
+{
+ int rc = SLURM_SUCCESS;
+
+ debug3("mpi/pmi2: in _handle_task_request, lrank=%d", lrank);
+
+ if (initialized[lrank] == 0) {
+ rc = _handle_pmi1_init(fd, lrank);
+ initialized[lrank] = 1;
+ } else if (is_pmi11()) {
+ rc = handle_pmi1_cmd(fd, lrank);
+ } else if (is_pmi20()) {
+ rc = handle_pmi2_cmd(fd, lrank);
+ } else {
+ fatal("this is impossible");
+ }
+ return rc;
+}
+
+static int
+_handle_tree_request(int fd)
+{
+ uint32_t temp;
+ int rc = SLURM_SUCCESS;
+
+ if (in_stepd()) { /* skip uid passed from slurmd */
+ safe_read(fd, &temp, sizeof(uint32_t));
+ temp = ntohl(temp);
+ debug3("mpi/pmi2: _handle_tree_request: req from uid %u", temp);
+ }
+ rc = handle_tree_cmd(fd);
+ return rc;
+rwfail:
+ return SLURM_ERROR;
+}
+
+/*********************************************************************/
+
+static bool
+_is_fd_ready(int fd)
+{
+ struct pollfd pfd[1];
+ int rc;
+
+ pfd[0].fd = fd;
+ pfd[0].events = POLLIN;
+
+ rc = poll(pfd, 1, 10);
+
+ return ((rc == 1) && (pfd[0].revents & POLLIN));
+}
+
+static bool
+_tree_listen_readable(eio_obj_t *obj)
+{
+ debug2("mpi/pmi2: _tree_listen_readable");
+ if (obj->shutdown == true) {
+ if (obj->fd != -1) {
+ close(obj->fd);
+ obj->fd = -1;
+ }
+ debug2(" false, shutdown");
+ return false;
+ }
+ return true;
+}
+
+static int
+_tree_listen_read(eio_obj_t *obj, List objs)
+{
+ int sd;
+ struct sockaddr addr;
+ struct sockaddr_in *sin;
+ socklen_t size = sizeof(addr);
+ char buf[INET_ADDRSTRLEN];
+
+ debug2("mpi/pmi2: _tree_listen_read");
+
+ while (1) {
+ /*
+ * Return early if fd is not now ready
+ */
+ if (!_is_fd_ready(obj->fd))
+ return 0;
+
+ while ((sd = accept(obj->fd, &addr, &size)) < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN) /* No more connections */
+ return 0;
+ if ((errno == ECONNABORTED) ||
+ (errno == EWOULDBLOCK)) {
+ return 0;
+ }
+ error("mpi/pmi2: unable to accept new connection: %m");
+ return 0;
+ }
+
+ if (! in_stepd()) {
+ sin = (struct sockaddr_in *) &addr;
+ inet_ntop(AF_INET, &sin->sin_addr, buf, INET_ADDRSTRLEN);
+ debug3("mpi/pmi2: accepted tree connection: ip=%s sd=%d",
+ buf, sd);
+ }
+
+ /* read command from socket and handle it */
+ _handle_tree_request(sd);
+ close(sd);
+ }
+ return 0;
+}
+
+/*********************************************************************/
+
+static bool
+_task_readable(eio_obj_t *obj)
+{
+ int lrank;
+
+ debug2("mpi/pmi2: _task_readable");
+
+ lrank = (int)(long)(obj->arg);
+ if (finalized[lrank] == 1) {
+ debug2(" false, finalized");
+ return false;
+ }
+
+ if (obj->shutdown == true) {
+ if (obj->fd != -1) {
+ close(obj->fd);
+ obj->fd = -1;
+ }
+ debug2(" false, shutdown");
+ return false;
+ }
+ return true;
+}
+
+static int
+_task_read(eio_obj_t *obj, List objs)
+{
+ int rc, lrank;
+
+ lrank = (int)(long)(obj->arg);
+ rc = _handle_task_request(obj->fd, lrank);
+
+ return rc;
+}
+
+/*********************************************************************/
+
+/* the PMI1 init */
+static int
+_handle_pmi1_init(int fd, int lrank)
+{
+ char buf[64];
+ int version, subversion;
+ int n, rc = 0;
+
+ debug3("mpi/pmi2: in _handle_pmi1_init");
+
+ while ( (n = read(fd, buf, 64)) < 0 && errno == EINTR);
+ if (n < 0) {
+ error("mpi/pmi2: failed to read PMI1 init command");
+ return SLURM_ERROR;
+ }
+ buf[n] = '\0';
+
+ n = sscanf(buf, "cmd=init pmi_version=%d pmi_subversion=%d\n",
+ &version, &subversion);
+ if (n != 2) {
+ error("mpi/pmi2: invalid PMI1 init command: `%s'", buf);
+ rc = 1;
+ version = 2;
+ subversion = 0;
+ goto send_response;
+ }
+
+ rc = set_pmi_version(version, subversion);
+ if (rc != SLURM_SUCCESS) {
+ get_pmi_version(&version, &subversion);
+ } else
+ rc = 0;
+
+send_response:
+ snprintf(buf, 64, "cmd=response_to_init rc=%d pmi_version=%d "
+ "pmi_subversion=%d\n", rc, version, subversion);
+
+ while ( (n = write(fd, buf, strlen(buf))) < 0 && errno == EINTR);
+ if (n < 0) {
+ error ("mpi/pmi2: failed to write PMI1 init response");
+ return SLURM_ERROR;
+ }
+
+ debug3("mpi/pmi2: out _handle_pmi1_init");
+ return SLURM_SUCCESS;
+}
+
+/*********************************************************************/
+
+
+/*
+ * main loop of agent thread
+ */
+static void *
+_agent(void * unused)
+{
+ eio_handle_t *pmi2_handle;
+ eio_obj_t *tree_listen_obj, *task_obj;
+ int i;
+
+ pmi2_handle = eio_handle_create();
+
+ //fd_set_nonblocking(tree_sock);
+ tree_listen_obj = eio_obj_create(tree_sock, &tree_listen_ops,
+ (void *)(-1));
+ eio_new_initial_obj(pmi2_handle, tree_listen_obj);
+
+ /* for stepd, add the sockets to tasks */
+ if (in_stepd()) {
+ for (i = 0; i < job_info.ltasks; i ++) {
+ task_obj = eio_obj_create(STEPD_PMI_SOCK(i), &task_ops,
+ (void*)(long)(i));
+ eio_new_initial_obj(pmi2_handle, task_obj);
+ }
+ initialized = xmalloc(job_info.ltasks * sizeof(int));
+ finalized = xmalloc(job_info.ltasks * sizeof(int));
+ }
+
+ eio_handle_mainloop(pmi2_handle);
+
+ debug("mpi/pmi2: agent thread exit");
+
+ eio_handle_destroy(pmi2_handle);
+ return NULL;
+}
+
+/*
+ * start the PMI2 agent thread
+ */
+extern int
+pmi2_start_agent(void)
+{
+ int retries = 0;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ while ((errno = pthread_create(&pmi2_agent_tid, &attr,
+ &_agent, NULL))) {
+ if (++retries > MAX_RETRIES) {
+ error ("mpi/pmi2: pthread_create error %m");
+ slurm_attr_destroy(&attr);
+ return SLURM_ERROR;
+ }
+ sleep(1);
+ }
+ slurm_attr_destroy(&attr);
+ debug("mpi/pmi2: started agent thread (%lu)",
+ (unsigned long) pmi2_agent_tid);
+
+ return SLURM_SUCCESS;
+}
+
+/*
+ * stop the PMI2 agent thread
+ */
+extern int
+pmi2_stop_agent(void)
+{
+ if (pmi2_agent_tid)
+ pthread_cancel(pmi2_agent_tid);
+ return SLURM_SUCCESS;
+}
+
+extern void
+task_finalize(int lrank)
+{
+ finalized[lrank] = 1;
+}
diff --git a/src/plugins/mpi/pmi2/agent.h b/src/plugins/mpi/pmi2/agent.h
new file mode 100644
index 0000000..67dc2ff
--- /dev/null
+++ b/src/plugins/mpi/pmi2/agent.h
@@ -0,0 +1,47 @@
+/*****************************************************************************\
+ ** agent.h - PMI2 handling thread
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _AGENT_H
+#define _AGENT_H
+
+
+extern int pmi2_start_agent(void);
+extern int pmi2_stop_agent(void);
+
+extern void task_finalize(int lrank);
+
+#endif /* _AGENT_H */
diff --git a/src/plugins/mpi/pmi2/client.c b/src/plugins/mpi/pmi2/client.c
new file mode 100644
index 0000000..b19a09b
--- /dev/null
+++ b/src/plugins/mpi/pmi2/client.c
@@ -0,0 +1,566 @@
+/*****************************************************************************\
+ ** client.c - PMI2 client wire protocol message handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "src/common/slurm_xlator.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+
+#include "client.h"
+#include "pmi.h"
+
+#define KEY_INDEX(i) (i * 2)
+#define VAL_INDEX(i) (i * 2 + 1)
+#define MP_KEY(msg, index) (msg->pairs[KEY_INDEX(index)])
+#define MP_VAL(req, index) (req->pairs[VAL_INDEX(index)])
+
+#define REQ_PAIR_SIZE_INC 32
+
+static int pmi_version = 0;
+static int pmi_subversion = 0;
+
+extern int
+is_pmi11(void)
+{
+ return (pmi_version == PMI11_VERSION &&
+ pmi_subversion == PMI11_SUBVERSION);
+}
+
+extern int
+is_pmi20(void)
+{
+ return (pmi_version == PMI20_VERSION &&
+ pmi_subversion == PMI20_SUBVERSION);
+}
+
+extern int
+get_pmi_version(int *version, int *subversion)
+{
+ if (pmi_version) {
+ *version = pmi_version;
+ *subversion = pmi_subversion;
+ return SLURM_SUCCESS;
+ } else
+ return SLURM_ERROR;
+}
+
+extern int
+set_pmi_version(int version, int subversion)
+{
+ if ( (version == PMI11_VERSION && subversion == PMI11_SUBVERSION) ||
+ (version == PMI20_VERSION && subversion == PMI20_SUBVERSION) ) {
+
+ if (pmi_version && (pmi_version != version ||
+ pmi_subversion != subversion)) {
+ error("mpi/pmi2: inconsistent client PMI version: "
+ "%d.%d(req) <> %d.%d(orig)", version, subversion,
+ pmi_version, pmi_subversion);
+ return SLURM_ERROR;
+ } else if (! pmi_version) {
+ verbose("mpi/pmi2: got client PMI1 init, version=%d.%d",
+ version, subversion);
+ pmi_version = version;
+ pmi_subversion = subversion;
+ }
+ } else {
+ error("mpi/pmi2: unsupported PMI version: %d.%d", version,
+ subversion);
+ return SLURM_ERROR;
+ }
+ return SLURM_SUCCESS;
+}
+
+static int
+_parse_cmd(client_req_t *req)
+{
+ int i = 0, len = 0;
+
+ len = strlen (MCMD_KEY"=");
+ if (! strncmp(req->buf, MCMD_KEY"=", len)) {
+ req->cmd = MCMD_KEY; /* XXX: mcmd=spawn */
+ req->sep = '\n';
+ req->term = '\n';
+ return SLURM_SUCCESS;
+ }
+
+ len = strlen (CMD_KEY"=");
+ if (strncmp(req->buf, CMD_KEY"=", len)) {
+ error("mpi/pmi2: request not begin with '" CMD_KEY "='");
+ error("mpi/pmi2: full request is: %s", req->buf);
+ return SLURM_ERROR;
+ }
+ req->cmd = &req->buf[len];
+
+ i = len;
+ if (is_pmi11()) {
+ req->sep = ' ';
+ req->term = '\n';
+ while (req->buf[i] != req->sep &&
+ req->buf[i] != req->term &&
+ i < req->buf_len) {
+ i ++;
+ }
+ } else if (is_pmi20()) {
+ req->sep = ';';
+ req->term = ';';
+ while (req->buf[i] != req->sep &&
+ req->buf[i] != req->term &&
+ i < req->buf_len) {
+ i ++;
+ }
+ }
+ if (i >= req->buf_len) {
+ error ("mpi/pmi2: cmd not properly terminated in client request");
+ return SLURM_ERROR;
+ }
+ req->buf[i] = '\0'; /* make it nul terminated */
+
+ req->parse_idx = i + 1;
+
+ /* TODO: concat processing */
+
+ return SLURM_SUCCESS;
+}
+
+
+extern client_req_t *
+client_req_init(uint32_t len, char *buf)
+{
+ client_req_t *req = NULL;
+
+ /* buf always '\0' terminated */
+ req = xmalloc(sizeof(client_req_t));
+ req->buf = buf;
+ req->buf_len = len;
+ req->parse_idx = 0;
+
+ if (_parse_cmd(req) != SLURM_SUCCESS) {
+ req = NULL;
+ }
+
+ return req;
+}
+
+extern void
+client_req_free(client_req_t *req)
+{
+ if (req) {
+ xfree(req->buf);
+ xfree(req);
+ }
+}
+
+
+/*
+ * No escape of ';' supported for now, hence no ';' in value.
+ * TODO: concat command processing
+ */
+extern int
+client_req_parse_body(client_req_t *req)
+{
+ int i = 0, rc = SLURM_SUCCESS;
+ char *key, *val;
+
+ /* skip cmd */
+ i = req->parse_idx;
+
+ while (i < req->buf_len) {
+ /* search for key */
+ key = &req->buf[i];
+ while (req->buf[i] != '=' && i < req->buf_len) {
+ i ++;
+ }
+ if (i >= req->buf_len) {
+ error("mpi/pmi2: no value for key %s in req", key);
+ rc = SLURM_ERROR;
+ break;
+ }
+ req->buf[i] = '\0'; /* make it nul terminated */
+ i ++;
+ debug3("mpi/pmi2: client req key %s", key);
+
+ /* search for val */
+ val = &req->buf[i];
+ while (req->buf[i] != req->sep &&
+ req->buf[i] != req->term &&
+ i < req->buf_len) {
+ i ++;
+ }
+ if (i >= req->buf_len) {
+ error("mpi/pmi2: value not properly terminated in "
+ "client request");
+ rc = SLURM_ERROR;
+ break;
+ }
+ req->buf[i] = '\0'; /* make it nul terminated */
+ i ++;
+ debug3("mpi/pmi2: client req val %s", val);
+ /*
+ * append pair.
+ * there may be duplicate keys in the pairs, such as in the
+ * spawn cmd. Hence the order of the pairs is of significance.
+ */
+ if (2 * (req->pairs_cnt + 2) > req->pairs_size) {
+ req->pairs_size += REQ_PAIR_SIZE_INC;
+ xrealloc(req->pairs, req->pairs_size * sizeof(char *));
+ }
+ req->pairs[KEY_INDEX(req->pairs_cnt)] = key;
+ req->pairs[VAL_INDEX(req->pairs_cnt)] = val;
+ req->pairs_cnt ++;
+ }
+ /* add a pair of NULL at the end, without increasing req->pairs_cnt */
+ req->pairs[KEY_INDEX(req->pairs_cnt)] = NULL;
+ req->pairs[VAL_INDEX(req->pairs_cnt)] = NULL;
+
+ return rc;
+}
+
+extern spawn_req_t *
+client_req_parse_spawn_req(client_req_t *req)
+{
+ spawn_req_t *spawn_req = NULL;
+ spawn_subcmd_t *subcmd = NULL;
+ int i = 0, j = 0, pi = 0;
+
+ /* req body already parsed */
+ pi = 0;
+
+ if (req->pairs_cnt - pi < 5) {
+ /* NCMDS, PREPUTCOUNT, SUBCMD, MAXPROCS, ARGC */
+ error("mpi/pmi2: wrong number of key-val pairs in spawn cmd");
+ return NULL;
+ }
+
+ spawn_req = spawn_req_new();
+
+ /* ncmds */
+ if (strcmp(MP_KEY(req, pi), NCMDS_KEY)) {
+ error("mpi/pmi2: '" NCMDS_KEY "' expected in spawn cmd");
+ goto req_err;
+ }
+ spawn_req->subcmd_cnt = atoi(MP_VAL(req, pi));
+ spawn_req->subcmds = xmalloc(spawn_req->subcmd_cnt *
+ sizeof(spawn_subcmd_t *));
+ pi ++;
+ /* preputcount */
+ if (strcmp(MP_KEY(req, pi), PREPUTCOUNT_KEY)) {
+ error("mpi/pmi2: '" PREPUTCOUNT_KEY "' expected in spawn cmd");
+ goto req_err;
+ }
+ spawn_req->preput_cnt = atoi(MP_VAL(req, pi));
+ pi ++;
+ if (req->pairs_cnt - pi <
+ ( (2 * spawn_req->preput_cnt) + (3 * spawn_req->subcmd_cnt) )) {
+ /* <PPKEY, PPVAL>, <SUBCMD, MAXPROCS, ARGC> */
+ error("mpi/pmi2: wrong number of key-val pairs in spawn cmd");
+ goto req_err;
+ }
+ spawn_req->pp_keys = xmalloc(spawn_req->preput_cnt * sizeof(char *));
+ spawn_req->pp_vals = xmalloc(spawn_req->preput_cnt * sizeof(char *));
+ /* ppkey,ppval */
+ for (i = 0; i < spawn_req->preput_cnt; i ++) {
+ /* ppkey */
+ if (strncmp(MP_KEY(req, pi), PPKEY_KEY, strlen(PPKEY_KEY)) ||
+ atoi((MP_KEY(req, pi) + strlen(PPKEY_KEY))) != i) {
+ error("mpi/pmi2: '" PPKEY_KEY
+ "%d' expected in spawn cmd", i);
+ goto req_err;
+ }
+ spawn_req->pp_keys[i] = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ /* ppval */
+ if (strncmp(MP_KEY(req, pi), PPVAL_KEY, strlen(PPVAL_KEY)) ||
+ atoi((MP_KEY(req, pi) + strlen(PPVAL_KEY))) != i) {
+ error("mpi/pmi2: '" PPVAL_KEY
+ "%d' expected in spawn cmd", i);
+ goto req_err;
+ }
+ spawn_req->pp_vals[i] = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ }
+ /* subcmd */
+ for (i = 0; i < spawn_req->subcmd_cnt; i ++) {
+ spawn_req->subcmds[i] = spawn_subcmd_new();
+ subcmd = spawn_req->subcmds[i];
+ /* subcmd */
+ if (strcmp(MP_KEY(req, pi), SUBCMD_KEY)) {
+ error("mpi/pmi2: '" SUBCMD_KEY
+ "' expected in spawn cmd");
+ goto req_err;
+ }
+ subcmd->cmd = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ /* maxprocs */
+ if (strcmp(MP_KEY(req, pi), MAXPROCS_KEY)) {
+ error("mpi/pmi2: '" MAXPROCS_KEY
+ "' expected in spawn cmd");
+ goto req_err;
+
+ }
+ subcmd->max_procs = atoi(MP_VAL(req, pi));
+ pi ++;
+ /* argc */
+ if (strcmp(MP_KEY(req, pi), ARGC_KEY)) {
+ error("mpi/pmi2: '" ARGC_KEY
+ "' expected in spawn cmd");
+ goto req_err;
+
+ }
+ subcmd->argc = atoi(MP_VAL(req, pi));
+ pi ++;
+ if (req->pairs_cnt - pi <
+ ( subcmd->argc + (3 * (spawn_req->subcmd_cnt - i - 1))) ) {
+ /* <ARGV>, <SUBCMD, MAXPROCS, ARGC> */
+ error("mpi/pmi2: wrong number of key-val pairs"
+ " in spawn cmd");
+ goto req_err;
+ }
+ debug("mpi/pmi2: argc = %d", subcmd->argc);
+ if (subcmd->argc > 0) {
+ subcmd->argv = xmalloc(subcmd->argc * sizeof(char *));
+ }
+ /* argv */
+ for (j = 0; j < subcmd->argc; j ++) {
+ if (strncmp(MP_KEY(req, pi), ARGV_KEY,
+ strlen(ARGV_KEY)) ||
+ atoi((MP_KEY(req, pi) + strlen(ARGV_KEY))) != j) {
+ error("mpi/pmi2: '" ARGV_KEY
+ "%d' expected in spawn cmd", j);
+ goto req_err;
+ }
+ subcmd->argv[j] = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ }
+ debug("mpi/pmi2: got argv");
+ /* infokeycount, optional */
+ if (pi == req->pairs_cnt) {
+ if (i != spawn_req->subcmd_cnt - 1) {
+ error("mpi/pmi2: wrong number of key-val pairs"
+ "in spawn cmd");
+ goto req_err;
+ }
+ break;
+ } else if (strcmp(MP_KEY(req, pi), INFOKEYCOUNT_KEY)) {
+ subcmd->info_cnt = 0;
+ continue;
+ }
+ subcmd->info_cnt = atoi(MP_VAL(req, pi));
+ pi ++;
+ if (req->pairs_cnt - pi <
+ ( (2 * subcmd->info_cnt) +
+ (3 * (spawn_req->subcmd_cnt - i - 1)) )) {
+ /* <INFOKEY, INFOVAL>, <SUBCMD, MAXPROCS, ARGC> */
+ error("mpi/pmi2: wrong number of key-val pairs"
+ " in spawn cmd");
+ goto req_err;
+ }
+ if (subcmd->info_cnt > 0) {
+ subcmd->info_keys = xmalloc(subcmd->info_cnt *
+ sizeof(char *));
+ subcmd->info_vals = xmalloc(subcmd->info_cnt *
+ sizeof(char *));
+ }
+ /* infokey,infoval */
+ for (j = 0; j < subcmd->info_cnt; j ++) {
+ /* infokey */
+ if (strncmp(MP_KEY(req, pi), INFOKEY_KEY,
+ strlen(INFOKEY_KEY)) ||
+ atoi((MP_KEY(req, pi) +
+ strlen(INFOKEY_KEY))) != j) {
+ error("mpi/pmi2: '" INFOKEY_KEY
+ "%d' expected in spawn cmd", j);
+ goto req_err;
+ }
+ subcmd->info_keys[j] = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ /* infoval */
+ if (strncmp(MP_KEY(req, pi), INFOVAL_KEY,
+ strlen(INFOVAL_KEY)) ||
+ atoi((MP_KEY(req, pi) +
+ strlen(INFOVAL_KEY))) != j) {
+ error("mpi/pmi2: '" INFOVAL_KEY
+ "%d' expected in spawn cmd", j);
+ goto req_err;
+ }
+ subcmd->info_vals[j] = xstrdup(MP_VAL(req, pi));
+ pi ++;
+ }
+ }
+
+ debug("mpi/pmi2: out client_req_parse_spawn");
+ return spawn_req;
+
+req_err:
+ spawn_req_free(spawn_req);
+ return NULL;
+}
+
+extern spawn_subcmd_t *
+client_req_parse_spawn_subcmd(client_req_t *req)
+{
+ spawn_subcmd_t *subcmd = NULL;
+ char buf[PMI2_MAX_KEYLEN];
+ int i = 0;
+
+ subcmd = xmalloc(sizeof(spawn_subcmd_t));
+
+ client_req_get_str(req, EXECNAME_KEY, &subcmd->cmd);
+ client_req_get_int(req, NPROCS_KEY, (int *)&subcmd->max_procs);
+ client_req_get_int(req, ARGCNT_KEY, (int *)&subcmd->argc);
+ subcmd->argv = xmalloc(subcmd->argc * sizeof(char *));
+ for (i = 0; i < subcmd->argc; i ++) {
+ snprintf(buf, PMI2_MAX_KEYLEN, "arg%d", i + 1);
+ client_req_get_str(req, buf, &(subcmd->argv[i]));
+ }
+ client_req_get_int(req, INFONUM_KEY, (int *)&subcmd->info_cnt);
+ subcmd->info_keys = xmalloc(subcmd->info_cnt * sizeof(char *));
+ subcmd->info_vals = xmalloc(subcmd->info_cnt * sizeof(char *));
+ for (i = 0; i < subcmd->info_cnt; i ++) {
+ snprintf(buf, PMI2_MAX_KEYLEN, "info_key_%d", i);
+ client_req_get_str(req, buf, &(subcmd->info_keys[i]));
+ snprintf(buf, PMI2_MAX_KEYLEN, "info_val_%d", i);
+ client_req_get_str(req, buf, &(subcmd->info_vals[i]));
+ }
+ return subcmd;
+}
+
+/************************************************************************/
+
+/* returned value not dup-ed */
+static char *
+_client_req_get_val(client_req_t *req, const char *key)
+{
+ int i;
+
+ for (i = 0; i < req->pairs_cnt; i ++) {
+ if (! strcmp(key, req->pairs[KEY_INDEX(i)]))
+ return req->pairs[VAL_INDEX(i)];
+ }
+ return NULL;
+}
+
+/* return true if found */
+extern bool
+client_req_get_str(client_req_t *req, const char *key, char **pval)
+{
+ char *val;
+
+ val = _client_req_get_val(req, key);
+ if (val == NULL)
+ return false;
+
+ *pval = xstrdup(val);
+ return true;
+}
+
+extern bool
+client_req_get_int(client_req_t *req, const char *key, int *pval)
+{
+ char *val;
+
+ val = _client_req_get_val(req, key);
+ if (val == NULL)
+ return false;
+
+ *pval = atoi(val);
+ return true;
+}
+
+extern bool
+client_req_get_bool(client_req_t *req, const char *key, bool *pval)
+{
+ char *val;
+
+ val = _client_req_get_val(req, key);
+ if (val == NULL)
+ return false;
+
+ if (!strcasecmp(val, TRUE_VAL))
+ *pval = true;
+ else
+ *pval = false;
+ return true;
+}
+
+/* ************************************************************ */
+
+extern client_resp_t *
+client_resp_new(void)
+{
+ client_resp_t *resp;
+
+ resp = xmalloc(sizeof(client_resp_t));
+ return resp;
+}
+
+extern int
+client_resp_send(client_resp_t *resp, int fd)
+{
+ char len_buf[7];
+ int len;
+
+ len = strlen(resp->buf);
+
+ if ( is_pmi20() ) {
+ snprintf(len_buf, 7, "%-6d", len);
+ debug2("mpi/pmi2: client_resp_send: %s%s", len_buf, resp->buf);
+ safe_write(fd, len_buf, 6);
+ } else if ( is_pmi11() ) {
+ debug2("mpi/pmi2: client_resp_send: %s", resp->buf);
+ }
+ safe_write(fd, resp->buf, len);
+
+ return SLURM_SUCCESS;
+
+rwfail:
+ return SLURM_ERROR;
+}
+
+extern void
+client_resp_free(client_resp_t *resp)
+{
+ if (resp) {
+ xfree(resp->buf);
+ xfree(resp);
+ }
+}
+
diff --git a/src/plugins/mpi/pmi2/client.h b/src/plugins/mpi/pmi2/client.h
new file mode 100644
index 0000000..f1d4aa5
--- /dev/null
+++ b/src/plugins/mpi/pmi2/client.h
@@ -0,0 +1,103 @@
+/*****************************************************************************\
+ ** client.h - PMI client wire protocol message handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _CLIENT_H
+#define _CLIENT_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+# if HAVE_INTTYPES_H
+# include <inttypes.h>
+# else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+# endif /* HAVE_INTTYPES_H */
+#else /* !HAVE_CONFIG_H */
+# include <inttypes.h>
+#endif /* HAVE_CONFIG_H */
+
+#include "spawn.h"
+
+#define PMI11_VERSION 1
+#define PMI11_SUBVERSION 1
+#define PMI20_VERSION 2
+#define PMI20_SUBVERSION 0
+
+typedef struct client_request {
+ int buf_len;
+ char *buf;
+ char sep; /* cmd/value seperator */
+ char term; /* request terminator */
+ int parse_idx; /* ptr used in parsing */
+ char *cmd; /* cmd points to buf or other static memory*/
+ char **pairs; /* key-value in pairs always point to buf */
+ uint32_t pairs_size;
+ uint32_t pairs_cnt;
+} client_req_t;
+
+typedef struct client_response {
+ char *buf;
+} client_resp_t;
+
+
+extern int get_pmi_version(int *version, int *subversion);
+extern int set_pmi_version(int version, int subversion);
+extern int is_pmi11(void);
+extern int is_pmi20(void);
+
+extern client_req_t *client_req_init(uint32_t len, char *buf);
+extern void client_req_free(client_req_t *req);
+extern int client_req_parse_cmd(client_req_t *req);
+extern int client_req_parse_body(client_req_t *req);
+extern bool client_req_get_str(client_req_t *req, const char *key, char **val);
+extern bool client_req_get_int(client_req_t *req, const char *key, int *val);
+extern bool client_req_get_bool(client_req_t *req, const char *key, bool *val);
+
+extern spawn_req_t *client_req_parse_spawn_req(client_req_t *req);
+extern spawn_subcmd_t *client_req_parse_spawn_subcmd(client_req_t *req);
+
+extern client_resp_t *client_resp_new(void);
+extern int client_resp_send(client_resp_t *req, int fd);
+extern void client_resp_free(client_resp_t *resp);
+/* XXX: this requires CPP */
+#define client_resp_append(msg, fmt, ...) do { \
+ xstrfmtcat(msg->buf, fmt, ## __VA_ARGS__); \
+ } while (0)
+
+
+#endif /* _CLIENT_H */
diff --git a/src/plugins/mpi/pmi2/info.c b/src/plugins/mpi/pmi2/info.c
new file mode 100644
index 0000000..1d9b5e0
--- /dev/null
+++ b/src/plugins/mpi/pmi2/info.c
@@ -0,0 +1,179 @@
+/*****************************************************************************\
+ ** info.c - job/node info related functions
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#include "pmi.h"
+#include "setup.h"
+#include "client.h"
+
+#define NODE_ATTR_SIZE_INC 8
+
+/* pending node attribute get request */
+typedef struct nag_req {
+ int fd;
+ int rank;
+ char key[PMI2_MAX_KEYLEN];
+ struct nag_req *next;
+} nag_req_t;
+static nag_req_t *nag_req_list = NULL;
+
+/* node attributes */
+static int na_cnt = 0;
+static int na_size = 0;
+static char **node_attr = NULL;
+
+#define KEY_INDEX(i) (i * 2)
+#define VAL_INDEX(i) (i * 2 + 1)
+
+
+static void inline
+_free_nag_req(nag_req_t *req)
+{
+ xfree (req);
+}
+
+extern int
+enqueue_nag_req(int fd, int rank, char *key)
+{
+ nag_req_t *req;
+
+ req = xmalloc(sizeof(nag_req_t));
+ req->fd = fd;
+ req->rank = rank;
+ strncpy(req->key, key, PMI2_MAX_KEYLEN);
+
+ /* insert in the head */
+ req->next = nag_req_list;
+ nag_req_list = req;
+ return SLURM_SUCCESS;
+}
+
+extern int
+node_attr_put(char *key, char *val)
+{
+ nag_req_t *req = NULL, **pprev = NULL;
+ client_resp_t *resp = NULL;
+ int rc = SLURM_SUCCESS;
+
+ debug3("mpi/pmi2: node_attr_put: %s=%s", key, val);
+
+ if (na_cnt * 2 >= na_size) {
+ na_size += NODE_ATTR_SIZE_INC;
+ xrealloc(node_attr, na_size * sizeof(char*));
+ }
+ node_attr[KEY_INDEX(na_cnt)] = xstrdup(key);
+ node_attr[VAL_INDEX(na_cnt)] = xstrdup(val);
+ na_cnt ++;
+
+ /* process pending requests */
+ pprev = &nag_req_list;
+ req = *pprev;
+ while (req != NULL) {
+ if (strncmp(key, req->key, PMI2_MAX_KEYLEN)) {
+ pprev = &req->next;
+ req = *pprev;
+ } else {
+ debug("mpi/pmi2: found pending request from rank %d",
+ req->rank);
+
+ /* send response msg */
+ if (! resp) {
+ resp = client_resp_new();
+ client_resp_append(resp,
+ CMD_KEY"="
+ GETNODEATTRRESP_CMD";"
+ RC_KEY"=0;"
+ FOUND_KEY"="TRUE_VAL";"
+ VALUE_KEY"=%s;", val);
+ }
+ rc = client_resp_send(resp, req->fd);
+ if (rc != SLURM_SUCCESS) {
+ error("mpi/pmi2: failed to send '"
+ GETNODEATTRRESP_CMD "' to task %d",
+ req->rank);
+ }
+ /* remove the request */
+ *pprev = req->next;
+ _free_nag_req(req);
+ req = *pprev;
+ }
+ }
+ if (resp) {
+ client_resp_free (resp);
+ }
+ debug3("mpi/pmi2: out node_attr_put");
+ return SLURM_SUCCESS;
+}
+
+/* returned value not dup-ed */
+extern char *
+node_attr_get(char *key)
+{
+ int i;
+ char *val = NULL;
+
+ debug3("mpi/pmi2: node_attr_get: key=%s", key);
+
+ for (i = 0; i < na_cnt; i ++) {
+ if (! strcmp(key, node_attr[KEY_INDEX(i)])) {
+ val = node_attr[VAL_INDEX(i)];
+ break;
+ }
+ }
+
+ debug3("mpi/pmi2: out node_attr_get: val=%s", val);
+ return val;
+}
+
+/* returned value not dup-ed */
+extern char *
+job_attr_get(char *key)
+{
+ static char attr[PMI2_MAX_VALLEN];
+
+ if (!strcmp(key, JOB_ATTR_PROC_MAP)) {
+ return job_info.proc_mapping;
+ }
+
+ if (!strcmp(key, JOB_ATTR_UNIV_SIZE)) {
+ snprintf(attr, PMI2_MAX_VALLEN, "%d", job_info.ntasks);
+ return attr;
+ }
+
+ return NULL;
+}
+
+
diff --git a/src/plugins/mpi/pmi2/info.h b/src/plugins/mpi/pmi2/info.h
new file mode 100644
index 0000000..dcdfb25
--- /dev/null
+++ b/src/plugins/mpi/pmi2/info.h
@@ -0,0 +1,60 @@
+/*****************************************************************************\
+ ** info.h - node/job info
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _INFO_H
+#define _INFO_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+# if HAVE_INTTYPES_H
+# include <inttypes.h>
+# else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+# endif /* HAVE_INTTYPES_H */
+#else /* !HAVE_CONFIG_H */
+# include <inttypes.h>
+#endif /* HAVE_CONFIG_H */
+
+extern char *job_attr_get(char *key);
+extern char *node_attr_get(char *key);
+extern int node_attr_put(char *key, char *val);
+extern int enqueue_nag_req(int fd, int rank, char *key);
+
+
+#endif /* _INFO_H */
diff --git a/src/plugins/mpi/pmi2/kvs.c b/src/plugins/mpi/pmi2/kvs.c
new file mode 100644
index 0000000..4c29a15
--- /dev/null
+++ b/src/plugins/mpi/pmi2/kvs.c
@@ -0,0 +1,299 @@
+/*****************************************************************************\
+ ** kvs.c - KVS manipulation functions
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#include <stdlib.h>
+
+#include "kvs.h"
+#include "setup.h"
+#include "tree.h"
+#include "pmi.h"
+
+/* for fence */
+int tasks_to_wait = 0;
+int children_to_wait = 0;
+
+
+/* bucket of key-value pairs */
+typedef struct kvs_bucket {
+ char **pairs;
+ uint32_t count;
+ uint32_t size;
+} kvs_bucket_t;
+
+static kvs_bucket_t *kvs_hash = NULL;
+static uint32_t hash_size = 0;
+
+static char *temp_kvs_buf = NULL;
+static int temp_kvs_cnt = 0;
+static int temp_kvs_size = 0;
+
+static int no_dup_keys = 0;
+
+#define TASKS_PER_BUCKET 8
+#define TEMP_KVS_SIZE_INC 2048
+
+#define KEY_INDEX(i) (i * 2)
+#define VAL_INDEX(i) (i * 2 + 1)
+#define HASH(key) ( _hash(key) % hash_size)
+
+inline static uint32_t
+_hash(char *key)
+{
+ int len, i;
+ uint32_t hash = 0;
+ uint8_t shift;
+
+ len = strlen(key);
+ for (i = 0; i < len; i ++) {
+ shift = (uint8_t)(hash >> 24);
+ hash = (hash << 8) | (uint32_t)(shift ^ (uint8_t)key[i]);
+ }
+ return hash;
+}
+
+extern int
+temp_kvs_init(void)
+{
+ uint16_t cmd;
+ uint32_t nodeid, num_children, size;
+ Buf buf = NULL;
+
+ xfree(temp_kvs_buf);
+ temp_kvs_cnt = 0;
+ temp_kvs_size = TEMP_KVS_SIZE_INC;
+ temp_kvs_buf = xmalloc(temp_kvs_size);
+
+ /* put the tree cmd here to simplify message sending */
+ if (in_stepd()) {
+ cmd = TREE_CMD_KVS_FENCE;
+ } else {
+ cmd = TREE_CMD_KVS_FENCE_RESP;
+ }
+
+ buf = init_buf(1024);
+ pack16(cmd, buf);
+ if (in_stepd()) {
+ nodeid = job_info.nodeid;
+ /* XXX: TBC */
+ num_children = tree_info.num_children + 1;
+
+ pack32((uint32_t)nodeid, buf); /* from_nodeid */
+ packstr(tree_info.this_node, buf); /* from_node */
+ pack32((uint32_t)num_children, buf); /* num_children */
+ }
+ size = get_buf_offset(buf);
+ if (temp_kvs_cnt + size > temp_kvs_size) {
+ temp_kvs_size += TEMP_KVS_SIZE_INC;
+ xrealloc(temp_kvs_buf, temp_kvs_size);
+ }
+ memcpy(&temp_kvs_buf[temp_kvs_cnt], get_buf_data(buf), size);
+ temp_kvs_cnt += size;
+ free_buf(buf);
+
+ tasks_to_wait = 0;
+ children_to_wait = 0;
+
+ return SLURM_SUCCESS;
+}
+
+extern int
+temp_kvs_add(char *key, char *val)
+{
+ Buf buf;
+ uint32_t size;
+
+ if ( key == NULL || val == NULL )
+ return SLURM_SUCCESS;
+
+ buf = init_buf(PMI2_MAX_KEYLEN + PMI2_MAX_VALLEN + 2 * sizeof(uint32_t));
+ packstr(key, buf);
+ packstr(val, buf);
+ size = get_buf_offset(buf);
+ if (temp_kvs_cnt + size > temp_kvs_size) {
+ temp_kvs_size += TEMP_KVS_SIZE_INC;
+ xrealloc(temp_kvs_buf, temp_kvs_size);
+ }
+ memcpy(&temp_kvs_buf[temp_kvs_cnt], get_buf_data(buf), size);
+ temp_kvs_cnt += size;
+ free_buf(buf);
+
+ return SLURM_SUCCESS;
+}
+
+extern int
+temp_kvs_merge(Buf buf)
+{
+ char *data;
+ uint32_t offset, size;
+
+ size = remaining_buf(buf);
+ if (size == 0) {
+ return SLURM_SUCCESS;
+ }
+ data = get_buf_data(buf);
+ offset = get_buf_offset(buf);
+
+ if (temp_kvs_cnt + size > temp_kvs_size) {
+ temp_kvs_size += size;
+ xrealloc(temp_kvs_buf, temp_kvs_size);
+ }
+ memcpy(&temp_kvs_buf[temp_kvs_cnt], &data[offset], size);
+ temp_kvs_cnt += size;
+
+ return SLURM_SUCCESS;
+}
+
+extern int
+temp_kvs_send(void)
+{
+ int rc;
+
+ /* cmd included in temp_kvs_buf */
+
+ if (! in_stepd()) { /* srun */
+ rc = tree_msg_to_stepds(job_info.step_nodelist,
+ temp_kvs_cnt,
+ temp_kvs_buf);
+ } else if (tree_info.parent_node != NULL) {
+ /* non-first-level stepds */
+ rc = tree_msg_to_stepds(tree_info.parent_node,
+ temp_kvs_cnt,
+ temp_kvs_buf);
+ } else { /* first level stepds */
+ rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf);
+ }
+
+ temp_kvs_init(); /* clear old temp kvs */
+ return rc;
+}
+
+/**************************************************************/
+
+extern int
+kvs_init(void)
+{
+ debug3("mpi/pmi2: in kvs_init");
+
+ hash_size = ((job_info.ntasks + TASKS_PER_BUCKET - 1) / TASKS_PER_BUCKET);
+
+ kvs_hash = xmalloc(hash_size * sizeof(kvs_bucket_t));
+
+ if (getenv(PMI2_KVS_NO_DUP_KEYS_ENV))
+ no_dup_keys = 1;
+
+ return SLURM_SUCCESS;
+}
+
+/*
+ * returned value is not dup-ed
+ */
+extern char *
+kvs_get(char *key)
+{
+ kvs_bucket_t *bucket;
+ char *val = NULL;
+ int i;
+
+ debug3("mpi/pmi2: in kvs_get, key=%s", key);
+
+ bucket = &kvs_hash[HASH(key)];
+ if (bucket->count > 0) {
+ for(i = 0; i < bucket->count; i ++) {
+ if (! strcmp(key, bucket->pairs[KEY_INDEX(i)])) {
+ val = bucket->pairs[VAL_INDEX(i)];
+ break;
+ }
+ }
+ }
+
+ debug3("mpi/pmi2: out kvs_get, val=%s", val);
+
+ return val;
+}
+
+extern int
+kvs_put(char *key, char *val)
+{
+ kvs_bucket_t *bucket;
+ int i;
+
+ debug3("mpi/pmi2: in kvs_put");
+
+ bucket = &kvs_hash[HASH(key)];
+
+ if (! no_dup_keys) {
+ for (i = 0; i < bucket->count; i ++) {
+ if (! strcmp(key, bucket->pairs[KEY_INDEX(i)])) {
+ /* replace the k-v pair */
+ xfree(bucket->pairs[VAL_INDEX(i)]);
+ bucket->pairs[VAL_INDEX(i)] = xstrdup(val);
+ debug("mpi/pmi2: put kvs %s=%s", key, val);
+ return SLURM_SUCCESS;
+ }
+ }
+ }
+ if (bucket->count * 2 >= bucket->size) {
+ bucket->size += (TASKS_PER_BUCKET * 2);
+ xrealloc(bucket->pairs, bucket->size * sizeof(char *));
+ }
+ /* add the k-v pair */
+ i = bucket->count;
+ bucket->pairs[KEY_INDEX(i)] = xstrdup(key);
+ bucket->pairs[VAL_INDEX(i)] = xstrdup(val);
+ bucket->count ++;
+
+ debug3("mpi/pmi2: put kvs %s=%s", key, val);
+ return SLURM_SUCCESS;
+}
+
+extern int
+kvs_clear(void)
+{
+ kvs_bucket_t *bucket;
+ int i, j;
+
+ for (i = 0; i < hash_size; i ++){
+ bucket = &kvs_hash[i];
+ for (j = 0; j < bucket->count; j ++) {
+ xfree (bucket->pairs[KEY_INDEX(j)]);
+ xfree (bucket->pairs[VAL_INDEX(j)]);
+ }
+ }
+ xfree(kvs_hash);
+
+ return SLURM_SUCCESS;
+}
diff --git a/src/plugins/mpi/pmi2/kvs.h b/src/plugins/mpi/pmi2/kvs.h
new file mode 100644
index 0000000..6747f8e
--- /dev/null
+++ b/src/plugins/mpi/pmi2/kvs.h
@@ -0,0 +1,65 @@
+/*****************************************************************************\
+ ** kvs.h - KVS manipulation functions
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _KVS_H
+#define _KVS_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <slurm/slurm_errno.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/xstring.h"
+#include "src/common/pack.h"
+
+
+extern int tasks_to_wait;
+extern int children_to_wait;
+
+extern int temp_kvs_init(void);
+extern int temp_kvs_add(char *key, char *val);
+extern int temp_kvs_merge(Buf buf);
+extern int temp_kvs_send(void);
+
+extern int kvs_init(void);
+extern char *kvs_get(char *key);
+extern int kvs_put(char *key, char *val);
+extern int kvs_clear(void);
+
+
+#endif /* _KVS_H */
diff --git a/src/plugins/mpi/pmi2/mpi_pmi2.c b/src/plugins/mpi/pmi2/mpi_pmi2.c
new file mode 100644
index 0000000..811bad7
--- /dev/null
+++ b/src/plugins/mpi/pmi2/mpi_pmi2.c
@@ -0,0 +1,182 @@
+/*****************************************************************************\
+ ** mpi_pmi2.c - Library routines for initiating MPI jobs using PMI2.
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+
+#include <slurm/slurm_errno.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/mpi.h"
+
+#include "setup.h"
+#include "agent.h"
+#include "spawn.h"
+
+/*
+ * These variables are required by the generic plugin interface. If they
+ * are not found in the plugin, the plugin loader will ignore it.
+ *
+ * plugin_name - a string giving a human-readable description of the
+ * plugin. There is no maximum length, but the symbol must refer to
+ * a valid string.
+ *
+ * plugin_type - a string suggesting the type of the plugin or its
+ * applicability to a particular form of data or method of data handling.
+ * If the low-level plugin API is used, the contents of this string are
+ * unimportant and may be anything. SLURM uses the higher-level plugin
+ * interface which requires this string to be of the form
+ *
+ * <application>/<method>
+ *
+ * where <application> is a description of the intended application of
+ * the plugin (e.g., "switch" for SLURM switch) and <method> is a description
+ * of how this plugin satisfies that application. SLURM will only load
+ * a switch plugin if the plugin_type string has a prefix of "switch/".
+ *
+ * plugin_version - an unsigned 32-bit integer giving the version number
+ * of the plugin. If major and minor revisions are desired, the major
+ * version number may be multiplied by a suitable magnitude constant such
+ * as 100 or 1000. Various SLURM versions will likely require a certain
+ * minimum versions for their plugins as this API matures.
+ */
+const char plugin_name[] = "mpi PMI2 plugin";
+const char plugin_type[] = "mpi/pmi2";
+const uint32_t plugin_version = 100;
+
+/*
+ * The following is executed in slurmstepd.
+ */
+
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job,
+ char ***env)
+{
+ int rc;
+
+ debug("using mpi/pmi2");
+
+ if (job->batch)
+ return SLURM_SUCCESS;
+
+ rc = pmi2_setup_stepd(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ if (pmi2_start_agent() < 0) {
+ error ("mpi/pmi2: failed to create pmi2 agent thread");
+ return SLURM_ERROR;
+ }
+
+ return SLURM_SUCCESS;
+}
+
+int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
+ char ***env)
+{
+ int i;
+
+ env_array_overwrite_fmt(env, "PMI_FD", "%u",
+ TASK_PMI_SOCK(job->ltaskid));
+
+ env_array_overwrite_fmt(env, "PMI_JOBID", "%s",
+ job_info.pmi_jobid);
+ env_array_overwrite_fmt(env, "PMI_RANK", "%u", job->gtaskid);
+ env_array_overwrite_fmt(env, "PMI_SIZE", "%u", job->ntasks);
+ if (job_info.spawn_seq) { /* PMI1.1 needs this env-var */
+ env_array_overwrite_fmt(env, "PMI_SPAWNED", "%u", 1);
+ }
+ /* close unused sockets in task */
+ close(tree_sock);
+ tree_sock = 0;
+ for (i = 0; i < job->ltasks; i ++) {
+ close(STEPD_PMI_SOCK(i));
+ STEPD_PMI_SOCK(i) = 0;
+ if (i != job->ltaskid) {
+ close(TASK_PMI_SOCK(i));
+ TASK_PMI_SOCK(i) = 0;
+ }
+ }
+ return SLURM_SUCCESS;
+}
+
+
+/*
+ * The following is executed in srun.
+ */
+
+mpi_plugin_client_state_t *
+p_mpi_hook_client_prelaunch(mpi_plugin_client_info_t *job, char ***env)
+{
+ int rc;
+
+ debug("mpi/pmi2: client_prelaunch");
+
+ rc = pmi2_setup_srun(job, env);
+ if (rc != SLURM_SUCCESS) {
+ return NULL;
+ }
+
+ if (pmi2_start_agent() < 0) {
+ error("failed to start PMI2 agent thread");
+ return NULL;
+ }
+
+ return (void *)0x12345678;
+}
+
+int p_mpi_hook_client_single_task_per_node(void)
+{
+ return false;
+}
+
+int p_mpi_hook_client_fini(mpi_plugin_client_state_t *state)
+{
+
+ pmi2_stop_agent();
+
+ /* the job may be allocated by this srun.
+ * or exit of this srun may cause the job script to exit.
+ * wait for the spawned steps. */
+ spawn_job_wait();
+
+ return SLURM_SUCCESS;
+}
+
diff --git a/src/plugins/mpi/pmi2/pmi.h b/src/plugins/mpi/pmi2/pmi.h
new file mode 100644
index 0000000..876190e
--- /dev/null
+++ b/src/plugins/mpi/pmi2/pmi.h
@@ -0,0 +1,235 @@
+/*****************************************************************************\
+ ** pmi.c - PMI common definitions
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _PMI_H
+#define _PMI_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/***********************************************************\
+ * PMI1 definitions
+\***********************************************************/
+/* from src/pmi/simple/simeple_pmiutil.h */
+#define PMIU_MAXLINE 1024
+
+/* from src/pm/hydra/pm/pmiserv/pmi_common.h */
+#define MAXKEYLEN 64 /* max length of key in keyval space */
+#define MAXVALLEN 1024 /* max length of value in keyval space */
+#define MAXNAMELEN 256 /* max length of various names */
+#define MAXKVSNAME MAXNAMELEN /* max length of a kvsname */
+
+
+#define GETMAXES_CMD "get_maxes"
+#define MAXES_CMD "maxes"
+#define GETUNIVSIZE_CMD "get_universe_size"
+#define UNIVSIZE_CMD "universe_size"
+#define GETAPPNUM_CMD "get_appnum"
+#define APPNUM_CMD "appnum"
+#define BARRIERIN_CMD "barrier_in"
+#define BARRIEROUT_CMD "barrier_out"
+#define FINALIZE_CMD "finalize"
+#define FINALIZEACK_CMD "finalize_ack"
+#define ABORT_CMD "abort"
+#define PUT_CMD "put"
+#define PUTRESULT_CMD "put_result"
+#define GET_CMD "get"
+#define GETRESULT_CMD "get_result"
+#define GETBYIDX_CMD "getbyidx"
+#define GETBYIDXRESULTS_CMD "getbyidx_results"
+#define SPAWNRESULT_CMD "spawn_result"
+
+#define MCMD_KEY "mcmd"
+#define ENDCMD_KEY "endcmd"
+#define KVSNAMEMAX_KEY "kvsname_max"
+#define KEYLENMAX_KEY "keylen_max"
+#define VALLENMAX_KEY "vallen_max"
+#define SIZE_KEY "size"
+#define APPNUM_KEY "appnum"
+#define EXECNAME_KEY "execname"
+#define NPROCS_KEY "nprocs"
+#define ARGCNT_KEY "argcnt"
+#define INFONUM_KEY "info_num"
+#define TOTSPAWNS_KEY "totspawns"
+#define SPAWNSSOFAR_KEY "spawnssofar"
+#define KVSNAME_KEY "kvsname"
+#define PREPUTNUM_KEY "preput_num"
+#define PREPUTKEY_KEY "preput_key_"
+#define PREPUTVAL_KEY "preput_val_"
+
+
+/***********************************************************\
+ * PMI2 definitions
+\***********************************************************/
+
+/* from src/include/pmi2.h of mpich2 */
+#define PMI2_SUCCESS 0
+#define PMI2_FAIL -1
+#define PMI2_ERR_INIT 1
+#define PMI2_ERR_NOMEM 2
+#define PMI2_ERR_INVALID_ARG 3
+#define PMI2_ERR_INVALID_KEY 4
+#define PMI2_ERR_INVALID_KEY_LENGTH 5
+#define PMI2_ERR_INVALID_VAL 6
+#define PMI2_ERR_INVALID_VAL_LENGTH 7
+#define PMI2_ERR_INVALID_LENGTH 8
+#define PMI2_ERR_INVALID_NUM_ARGS 9
+#define PMI2_ERR_INVALID_ARGS 10
+#define PMI2_ERR_INVALID_NUM_PARSED 11
+#define PMI2_ERR_INVALID_KEYVALP 12
+#define PMI2_ERR_INVALID_SIZE 13
+#define PMI2_ERR_OTHER 14
+
+#define PMI2_MAX_KEYLEN 64
+#define PMI2_MAX_VALLEN 1024
+#define PMI2_MAX_ATTRVALUE 1024
+#define PMI2_ID_NULL -1
+
+/* modified from src/pmi/pmi2/simple2pmi.h of mpich2 */
+#define FULLINIT_CMD "fullinit"
+#define FULLINITRESP_CMD "fullinit-response"
+#define FINALIZE_CMD "finalize"
+#define FINALIZERESP_CMD "finalize-response"
+#define ABORT_CMD "abort"
+#define JOBGETID_CMD "job-getid"
+#define JOBGETIDRESP_CMD "job-getid-response"
+#define JOBCONNECT_CMD "job-connect"
+#define JOBCONNECTRESP_CMD "job-connect-response"
+#define JOBDISCONNECT_CMD "job-disconnect"
+#define JOBDISCONNECTRESP_CMD "job-disconnect-response"
+#define KVSPUT_CMD "kvs-put"
+#define KVSPUTRESP_CMD "kvs-put-response"
+#define KVSFENCE_CMD "kvs-fence"
+#define KVSFENCERESP_CMD "kvs-fence-response"
+#define KVSGET_CMD "kvs-get"
+#define KVSGETRESP_CMD "kvs-get-response"
+#define GETNODEATTR_CMD "info-getnodeattr"
+#define GETNODEATTRRESP_CMD "info-getnodeattr-response"
+#define PUTNODEATTR_CMD "info-putnodeattr"
+#define PUTNODEATTRRESP_CMD "info-putnodeattr-response"
+#define GETJOBATTR_CMD "info-getjobattr"
+#define GETJOBATTRRESP_CMD "info-getjobattr-response"
+#define NAMEPUBLISH_CMD "name-publish"
+#define NAMEPUBLISHRESP_CMD "name-publish-response"
+#define NAMEUNPUBLISH_CMD "name-unpublish"
+#define NAMEUNPUBLISHRESP_CMD "name-unpublish-response"
+#define NAMELOOKUP_CMD "name-lookup"
+#define NAMELOOKUPRESP_CMD "name-lookup-response"
+#define SPAWN_CMD "spawn"
+#define SPAWNRESP_CMD "spawn-response"
+
+#define GETMYKVSNAME_CMD "get_my_kvsname"
+#define GETMYKVSNAMERESP_CMD "my_kvsname"
+#define CREATEKVS_CMD "create_kvs"
+#define DESTROYKVS_CMD "destroy_kvs"
+#define PUBLISHNAME_CMD "publish_name"
+#define UNPUBLISHNAME_CMD "unpublish_name"
+#define LOOKUPNAME_CMD "lookup_name"
+#define MCMD_CMD "mcmd"
+
+
+#define CMD_KEY "cmd"
+#define PMIJOBID_KEY "pmijobid"
+#define PMIRANK_KEY "pmirank"
+#define SRCID_KEY "srcid"
+#define THREADED_KEY "threaded"
+#define RC_KEY "rc"
+#define ERRMSG_KEY "errmsg"
+#define PMIVERSION_KEY "pmi-version"
+#define PMISUBVER_KEY "pmi-subversion"
+#define RANK_KEY "rank"
+#define SIZE_KEY "size"
+#define APPNUM_KEY "appnum"
+#define SPAWNERJOBID_KEY "spawner-jobid"
+#define DEBUGGED_KEY "debugged"
+#define PMIVERBOSE_KEY "pmiverbose"
+#define ISWORLD_KEY "isworld"
+#define MSG_KEY "msg"
+#define JOBID_KEY "jobid"
+#define KVSCOPY_KEY "kvscopy"
+#define KEY_KEY "key"
+#define VALUE_KEY "value"
+#define FOUND_KEY "found"
+#define WAIT_KEY "wait"
+#define NAME_KEY "name"
+#define PORT_KEY "port"
+#define THRID_KEY "thrid"
+#define INFOKEYCOUNT_KEY "infokeycount"
+#define INFOKEY_KEY "infokey"
+#define INFOVAL_KEY "infoval"
+#define FOUND_KEY "found"
+#define NCMDS_KEY "ncmds"
+#define PREPUTCOUNT_KEY "preputcount"
+#define PPKEY_KEY "ppkey"
+#define PPVAL_KEY "ppval"
+#define SUBCMD_KEY "subcmd"
+#define MAXPROCS_KEY "maxprocs"
+#define ARGC_KEY "argc"
+#define ARGV_KEY "argv"
+#define INFOKEYCOUNT_KEY "infokeycount"
+#define ERRCODES_KEY "errcodes"
+
+#define TRUE_VAL "TRUE"
+#define FALSE_VAL "FALSE"
+
+#define JOB_ATTR_PROC_MAP "PMI_process_mapping"
+#define JOB_ATTR_UNIV_SIZE "universeSize"
+
+/***********************************************************\
+ * Environment variables
+\***********************************************************/
+#define PMI2_SRUN_PORT_ENV "SLURM_PMI2_SRUN_PORT"
+#define PMI2_STEP_NODES_ENV "SLURM_PMI2_STEP_NODES"
+#define PMI2_TREE_WIDTH_ENV "SLURM_PMI2_TREE_WIDTH"
+#define PMI2_PROC_MAPPING_ENV "SLURM_PMI2_PROC_MAPPING"
+#define PMI2_PMI_JOBID_ENV "SLURM_PMI2_PMI_JOBID"
+#define PMI2_SPAWN_SEQ_ENV "SLURM_PMI2_SPAWN_SEQ"
+#define PMI2_SPAWNER_JOBID_ENV "SLURM_PMI2_SPAWNER_JOBID"
+#define PMI2_SPAWNER_PORT_ENV "SLURM_PMI2_SPAWNER_PORT"
+#define PMI2_PREPUT_CNT_ENV "SLURM_PMI2_PREPUT_COUNT"
+#define PMI2_PPKEY_ENV "SLURM_PMI2_PPKEY"
+#define PMI2_PPVAL_ENV "SLURM_PMI2_PPVAL"
+/* old PMIv1 envs */
+#define PMI2_PMI_DEBUGGED_ENV "PMI_DEBUG"
+#define PMI2_KVS_NO_DUP_KEYS_ENV "SLURM_PMI_KVS_NO_DUP_KEYS"
+
+
+extern int handle_pmi1_cmd(int fd, int lrank);
+extern int handle_pmi2_cmd(int fd, int lrank);
+
+#endif /* _PMI_H */
diff --git a/src/plugins/mpi/pmi2/pmi1.c b/src/plugins/mpi/pmi2/pmi1.c
new file mode 100644
index 0000000..04df1d4
--- /dev/null
+++ b/src/plugins/mpi/pmi2/pmi1.c
@@ -0,0 +1,540 @@
+/*****************************************************************************\
+ ** pmi1.c - PMI1 client(task) command handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <slurm/slurm_errno.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/xmalloc.h"
+#include "src/common/log.h"
+
+#include "pmi.h"
+#include "client.h"
+#include "spawn.h"
+#include "setup.h"
+#include "kvs.h"
+#include "agent.h"
+
+/* client command handlers */
+static int _handle_get_maxes(int fd, int lrank, client_req_t *req);
+static int _handle_get_universe_size(int fd, int lrank, client_req_t *req);
+static int _handle_get_appnum(int fd, int lrank, client_req_t *req);
+static int _handle_barrier_in(int fd, int lrank, client_req_t *req);
+static int _handle_finalize(int fd, int lrank, client_req_t *req);
+static int _handle_abort(int fd, int lrank, client_req_t *req);
+static int _handle_get_my_kvsname(int fd, int lrank, client_req_t *req);
+static int _handle_create_kvs(int fd, int lrank, client_req_t *req);
+static int _handle_destroy_kvs(int fd, int lrank, client_req_t *req);
+static int _handle_put(int fd, int lrank, client_req_t *req);
+static int _handle_get(int fd, int lrank, client_req_t *req);
+static int _handle_getbyidx(int fd, int lrank ,client_req_t *req);
+static int _handle_publish_name(int fd, int lrank, client_req_t *req);
+static int _handle_unpublish_name(int fd, int lrank, client_req_t *req);
+static int _handle_lookup_name(int fd, int lrank, client_req_t *req);
+static int _handle_mcmd(int fd, int lrank, client_req_t *req);
+
+static struct {
+ char *cmd;
+ int (*handler)(int fd, int lrank, client_req_t *req);
+} pmi1_cmd_handlers[] = {
+ { GETMAXES_CMD, _handle_get_maxes },
+ { GETUNIVSIZE_CMD, _handle_get_universe_size },
+ { GETAPPNUM_CMD, _handle_get_appnum },
+ { BARRIERIN_CMD, _handle_barrier_in },
+ { FINALIZE_CMD, _handle_finalize },
+ { ABORT_CMD, _handle_abort },
+ { GETMYKVSNAME_CMD, _handle_get_my_kvsname },
+ { CREATEKVS_CMD, _handle_create_kvs },
+ { DESTROYKVS_CMD, _handle_destroy_kvs },
+ { PUT_CMD, _handle_put },
+ { GET_CMD, _handle_get },
+ { GETBYIDX_CMD, _handle_getbyidx },
+ { PUBLISHNAME_CMD, _handle_publish_name },
+ { UNPUBLISHNAME_CMD, _handle_unpublish_name },
+ { LOOKUPNAME_CMD, _handle_lookup_name },
+ { MCMD_CMD, _handle_mcmd },
+ { NULL, NULL},
+};
+
+static spawn_req_t *pmi1_spawn = NULL;
+
+static int
+_handle_get_maxes(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+ client_resp_t *resp;
+
+ debug3("mpi/pmi2: in _handle_get_maxes");
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="MAXES_CMD" " RC_KEY"=%d "
+ KVSNAMEMAX_KEY"=%d " KEYLENMAX_KEY"=%d "
+ VALLENMAX_KEY"=%d\n",
+ rc, MAXKVSNAME, MAXKEYLEN, MAXVALLEN);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_get_maxes");
+ return SLURM_SUCCESS;
+}
+
+static int
+_handle_get_universe_size(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+ client_resp_t *resp;
+
+ debug3("mpi/pmi2: in _handle_get_universe_size");
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="UNIVSIZE_CMD" " RC_KEY"=%d "
+ SIZE_KEY"=%d\n",
+ rc, job_info.ntasks);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_get_universe_size");
+ return SLURM_SUCCESS;
+}
+
+static int
+_handle_get_appnum(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+ client_resp_t *resp;
+
+ debug3("mpi/pmi2: in _handle_get_appnum");
+
+ resp = client_resp_new();
+ /*
+ * TODO: spawn_multiple: order number of command
+ * spawn: 0
+ * otherwise: -1, since no way to get the order
+ * number from multi-prog conf
+ */
+ client_resp_append(resp, CMD_KEY"="APPNUM_CMD" " RC_KEY"=%d "
+ APPNUM_KEY"=-1\n", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_get_appnum");
+ return SLURM_SUCCESS;
+}
+
+static int
+_handle_barrier_in(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+
+ debug3("mpi/pmi2: in _handle_barrier_in, from task %d",
+ job_info.gtids[lrank]);
+ if (tasks_to_wait == 0 && children_to_wait == 0) {
+ tasks_to_wait = job_info.ltasks;
+ children_to_wait = tree_info.num_children;
+ }
+ tasks_to_wait --;
+
+ /* mutex protection is not required */
+ if (tasks_to_wait == 0 && children_to_wait == 0) {
+ rc = temp_kvs_send();
+ }
+ debug3("mpi/pmi2: out _handle_barrier_in, tasks_to_wait=%d, "
+ "children_to_wait=%d", tasks_to_wait, children_to_wait);
+ return rc;
+}
+
+static int
+_handle_finalize(int fd, int lrank, client_req_t *req)
+{
+ client_resp_t *resp;
+ int rc = 0;
+
+ debug3("mpi/pmi2: in _handle_finalize");
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="FINALIZEACK_CMD" "
+ RC_KEY"=%d\n", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+ debug3("mpi/pmi2: out _handle_finalize");
+ /* shutdown the PMI fd */
+ shutdown(fd, SHUT_RDWR);
+ close(fd);
+ task_finalize(lrank);
+ return rc;
+}
+
+static int
+_handle_abort(int fd, int lrank, client_req_t *req)
+{
+ debug3("mpi/pmi2: in _handle_abort");
+ /* no response needed. just cancel the job */
+ slurm_kill_job_step(job_info.jobid, job_info.stepid, SIGKILL);
+ debug3("mpi/pmi2: out _handle_abort");
+ return SLURM_SUCCESS;
+}
+
+static int
+_handle_get_my_kvsname(int fd, int lrank, client_req_t *req)
+{
+ client_resp_t *resp;
+ int rc = 0;
+
+ debug3("mpi/pmi2: in _handle_get_my_kvsname");
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="GETMYKVSNAMERESP_CMD" "
+ RC_KEY"=%d " KVSNAME_KEY"=%u.%u\n",
+ rc, job_info.jobid, job_info.stepid);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+ debug3("mpi/pmi2: out _handle_get_my_kvsname");
+ return rc;
+}
+
+static int
+_handle_create_kvs(int fd, int lrank, client_req_t *req)
+{
+ /* not used in MPICH2 */
+ error("mpi/pmi2: PMI1 request of '" CREATEKVS_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_destroy_kvs(int fd, int lrank, client_req_t *req)
+{
+ /* not used in MPICH2 */
+ error("mpi/pmi2: PMI1 request of '" DESTROYKVS_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+
+static int
+_handle_put(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+ client_resp_t *resp;
+ char *kvsname = NULL, *key = NULL, *val = NULL;
+
+ debug3("mpi/pmi2: in _handle_put");
+
+ client_req_parse_body(req);
+ client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */
+ client_req_get_str(req, KEY_KEY, &key);
+ client_req_get_str(req, VALUE_KEY, &val);
+
+ /* no need to add k-v to hash. just get it ready to be up-forward */
+ rc = temp_kvs_add(key, val);
+ if (rc == SLURM_SUCCESS)
+ rc = 0;
+ else
+ rc = 1;
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="PUTRESULT_CMD" " RC_KEY"=%d\n", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_put");
+ return rc;
+}
+
+static int
+_handle_get(int fd, int lrank, client_req_t *req)
+{
+ int rc;
+ client_resp_t *resp;
+ char *kvsname = NULL, *key = NULL, *val = NULL;
+
+ debug3("mpi/pmi2: in _handle_get");
+
+ client_req_parse_body(req);
+ client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */
+ client_req_get_str(req, KEY_KEY, &key);
+
+ val = kvs_get(key);
+
+ resp = client_resp_new();
+ if (val != NULL) {
+ client_resp_append(resp, CMD_KEY"="GETRESULT_CMD" "
+ RC_KEY"=0 " VALUE_KEY"=%s\n", val);
+ } else {
+ client_resp_append(resp, CMD_KEY"="GETRESULT_CMD" "
+ RC_KEY"=1\n");
+ }
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_get");
+ return rc;
+}
+
+
+static int
+_handle_getbyidx(int fd, int lrank, client_req_t *req)
+{
+ /* not used in MPICH2 */
+ error("mpi/pmi2: PMI1 request of '" GETBYIDX_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_publish_name(int fd, int lrank, client_req_t *req)
+{
+ error("mpi/pmi2: PMI1 request of '" PUBLISHNAME_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_unpublish_name(int fd, int lrank, client_req_t *req)
+{
+ error("mpi/pmi2: PMI1 request of '" UNPUBLISHNAME_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_lookup_name(int fd, int lrank, client_req_t *req)
+{
+ error("mpi/pmi2: PMI1 request of '" LOOKUPNAME_CMD "' not supported");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_mcmd(int fd, int lrank, client_req_t *req)
+{
+ spawn_subcmd_t *subcmd = NULL;
+ spawn_resp_t *spawn_resp = NULL;
+ client_resp_t *task_resp = NULL;
+ int spawnssofar = 0, rc = SLURM_SUCCESS, i;
+ char buf[64];
+
+ debug3("mpi/pmi2: in _handle_mcmd");
+
+ client_req_parse_body(req);
+ subcmd = client_req_parse_spawn_subcmd(req);
+
+ debug3("mpi/pmi2: got subcmd");
+
+ client_req_get_int(req, SPAWNSSOFAR_KEY, &spawnssofar);
+ if (spawnssofar == 1) {
+ pmi1_spawn = spawn_req_new();
+ client_req_get_int(req, TOTSPAWNS_KEY,
+ (int *)&pmi1_spawn->subcmd_cnt);
+ pmi1_spawn->subcmds = xmalloc(pmi1_spawn->subcmd_cnt *
+ sizeof(spawn_subcmd_t *));
+ client_req_get_int(req, PREPUTNUM_KEY,
+ (int *)&pmi1_spawn->preput_cnt);
+ pmi1_spawn->pp_keys =
+ xmalloc(pmi1_spawn->preput_cnt * sizeof(char *));
+ pmi1_spawn->pp_vals =
+ xmalloc(pmi1_spawn->preput_cnt * sizeof(char *));
+ for (i = 0; i < pmi1_spawn->preput_cnt; i ++) {
+ snprintf(buf, 64, PREPUTKEY_KEY"%d", i);
+ client_req_get_str(req, buf, &pmi1_spawn->pp_keys[i]);
+ snprintf(buf, 64, PREPUTVAL_KEY"%d", i);
+ client_req_get_str(req, buf, &pmi1_spawn->pp_vals[i]);
+ }
+ }
+ pmi1_spawn->subcmds[spawnssofar - 1] = subcmd;
+
+ if (spawnssofar == pmi1_spawn->subcmd_cnt) {
+ debug3("mpi/pmi2: got whole spawn req");
+ /* a resp will be send back from srun.
+ this will not be forwarded to the tasks */
+ rc = spawn_req_send_to_srun(pmi1_spawn, &spawn_resp);
+ if (spawn_resp->rc != SLURM_SUCCESS) {
+ task_resp = client_resp_new();
+ client_resp_append(task_resp, CMD_KEY"="SPAWNRESP_CMD";"
+ RC_KEY"=%d;"
+ ERRMSG_KEY"=spawn failed;",
+ spawn_resp->rc);
+ client_resp_send(task_resp, fd);
+ client_resp_free(task_resp);
+
+ spawn_resp_free(spawn_resp);
+ spawn_req_free(pmi1_spawn);
+ pmi1_spawn = NULL;
+ error("mpi/pmi2: spawn failed");
+ rc = SLURM_ERROR;
+ goto out;
+ }
+
+ debug("mpi/pmi2: spawn request sent to srun");
+ spawn_psr_enqueue(spawn_resp->seq, fd, lrank, NULL);
+
+ spawn_resp_free(spawn_resp);
+ spawn_req_free(pmi1_spawn);
+ pmi1_spawn = NULL;
+ }
+out:
+ debug3("mpi/pmi2: out _handle_mcmd");
+ return rc;
+}
+
+/**************************************************/
+
+/* from src/pmi/simple/simeple_pmiutil.c */
+#define MAX_READLINE 1024
+
+/* buf will be xfree-ed */
+static int
+_handle_pmi1_cmd_buf(int fd, int lrank, int buf_len, char *buf)
+{
+ client_req_t *req = NULL;
+ int i = 0, rc;
+
+ debug3("mpi/pmi2: got client request: %s", buf);
+
+ /* buf taken by req */
+ req = client_req_init(buf_len, buf);
+ if (req == NULL) {
+ error("mpi/pmi2: invalid client request");
+ return SLURM_ERROR;
+ }
+
+ i = 0;
+ while (pmi1_cmd_handlers[i].cmd != NULL) {
+ if (!strcmp(req->cmd, pmi1_cmd_handlers[i].cmd))
+ break;
+ i ++;
+ }
+ if (pmi1_cmd_handlers[i].cmd == NULL) {
+ error("mpi/pmi2: invalid pmi1 command received: '%s'", req->cmd);
+ rc = SLURM_ERROR;
+ } else {
+ rc = pmi1_cmd_handlers[i].handler(fd, lrank, req);
+ }
+ client_req_free(req); /* free buf */
+
+ return rc;
+}
+
+/* *pbuf not xfree-ed */
+static int
+_handle_pmi1_mcmd_buf(int fd, int lrank, int buf_size, int buf_len, char **pbuf)
+{
+ int n, len, endcmd_len, not_end;
+ char *cmd_buf = NULL, *tmp_buf = NULL, *tmp_ptr = NULL, *buf;
+ int rc = SLURM_SUCCESS;
+
+ /* read until "endcmd\n" */
+ buf = *pbuf;
+ n = buf_len;
+ endcmd_len = strlen(ENDCMD_KEY"\n");
+ not_end = strncmp(&buf[n - endcmd_len], ENDCMD_KEY"\n", endcmd_len);
+ while(not_end) {
+ if (n == buf_size) {
+ buf_size += MAX_READLINE;
+ xrealloc(buf, buf_size + 1);
+ *pbuf = buf;
+ }
+ while((len = read(fd, &buf[n], buf_size - n)) < 0
+ && errno == EINTR );
+ if (len < 0) {
+ error("mpi/pmi2: failed to read PMI1 request");
+ return SLURM_ERROR;
+ } else if (len == 0) {
+ debug("mpi/pmi2: read partial mcmd: %s", buf);
+ usleep(100);
+ } else {
+ n += len;
+ not_end = strncmp(&buf[n - endcmd_len],
+ ENDCMD_KEY"\n", endcmd_len);
+ }
+ }
+ buf[n] = '\0';
+
+ /* there maybe multiple subcmds in the buffer */
+ tmp_buf = buf;
+ tmp_ptr = NULL;
+ while (tmp_buf[0] != '\0') {
+ tmp_ptr = strstr(tmp_buf, ENDCMD_KEY"\n");
+ if ( tmp_ptr == NULL) {
+ error("mpi/pmi2: this is impossible");
+ }
+ *tmp_ptr = '\0';
+ n = tmp_ptr - tmp_buf;
+ cmd_buf = xstrdup(tmp_buf);
+ rc = _handle_pmi1_cmd_buf(fd, lrank, n, cmd_buf);
+ if (rc != SLURM_SUCCESS)
+ break;
+ tmp_buf = tmp_ptr + endcmd_len;
+ }
+
+ return rc;
+}
+
+extern int
+handle_pmi1_cmd(int fd, int lrank)
+{
+ char *buf = NULL;
+ int n, len, size, rc = SLURM_SUCCESS;
+
+ debug3("mpi/pmi2: in handle_pmi1_cmd");
+
+ /* TODO: read until newline */
+ size = MAX_READLINE;
+ buf = xmalloc(size + 1);
+ while ( (n = read(fd, buf, size)) < 0 && errno == EINTR );
+ if (n < 0) {
+ error("mpi/pmi2: failed to read PMI1 request");
+ xfree(buf);
+ return SLURM_ERROR;
+ } else if (n == 0) {
+ error("mpi/pmi2: read length 0");
+ xfree(buf);
+ return SLURM_ERROR;
+ }
+
+ len = strlen(MCMD_KEY"=");
+ if (! strncmp(buf, MCMD_KEY"=", len)) {
+ rc = _handle_pmi1_mcmd_buf(fd, lrank, size, n, &buf);
+ xfree(buf);
+ } else {
+ buf[n] = '\0';
+ rc = _handle_pmi1_cmd_buf(fd, lrank, n, buf);
+ }
+ debug3("mpi/pmi2: out handle_pmi1_cmd");
+ return rc;
+}
+
diff --git a/src/plugins/mpi/pmi2/pmi2.c b/src/plugins/mpi/pmi2/pmi2.c
new file mode 100644
index 0000000..ab26e92
--- /dev/null
+++ b/src/plugins/mpi/pmi2/pmi2.c
@@ -0,0 +1,502 @@
+/*****************************************************************************\
+ ** pmi2.c - PMI2 client(task) command handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <stdlib.h>
+
+#include "src/common/slurm_xlator.h"
+#include "src/common/log.h"
+
+#include "pmi.h"
+#include "client.h"
+#include "spawn.h"
+#include "kvs.h"
+#include "info.h"
+#include "setup.h"
+#include "agent.h"
+
+/* PMI2 command handlers */
+static int _handle_fullinit(int fd, int lrank, client_req_t *req);
+static int _handle_finalize(int fd, int lrank, client_req_t *req);
+static int _handle_abort(int fd, int lrank, client_req_t *req);
+static int _handle_job_getid(int fd, int lrank, client_req_t *req);
+static int _handle_job_connect(int fd, int lrank, client_req_t *req);
+static int _handle_job_disconnect(int fd, int lrank, client_req_t *req);
+static int _handle_kvs_put(int fd, int lrank, client_req_t *req);
+static int _handle_kvs_fence(int fd, int lrank, client_req_t *req);
+static int _handle_kvs_get(int fd, int lrank, client_req_t *req);
+static int _handle_info_getnodeattr(int fd, int lrank, client_req_t *req);
+static int _handle_info_putnodeattr(int fd, int lrank, client_req_t *req);
+static int _handle_info_getjobattr(int fd, int lrank, client_req_t *req);
+static int _handle_name_publish(int fd, int lrank, client_req_t *req);
+static int _handle_name_unpublish(int fd, int lrank, client_req_t *req);
+static int _handle_name_lookup(int fd, int lrank, client_req_t *req);
+static int _handle_spawn(int fd, int lrank, client_req_t *req);
+
+
+static struct {
+ char *cmd;
+ int (*handler)(int fd, int lrank, client_req_t *req);
+} pmi2_cmd_handlers[] = {
+ { FULLINIT_CMD, _handle_fullinit },
+ { FINALIZE_CMD, _handle_finalize },
+ { ABORT_CMD, _handle_abort },
+ { JOBGETID_CMD, _handle_job_getid },
+ { JOBCONNECT_CMD, _handle_job_connect },
+ { JOBDISCONNECT_CMD, _handle_job_disconnect },
+ { KVSPUT_CMD, _handle_kvs_put },
+ { KVSFENCE_CMD, _handle_kvs_fence },
+ { KVSGET_CMD, _handle_kvs_get },
+ { GETNODEATTR_CMD, _handle_info_getnodeattr },
+ { PUTNODEATTR_CMD, _handle_info_putnodeattr },
+ { GETJOBATTR_CMD, _handle_info_getjobattr },
+ { NAMEPUBLISH_CMD, _handle_name_publish },
+ { NAMEUNPUBLISH_CMD, _handle_name_unpublish },
+ { NAMELOOKUP_CMD, _handle_name_lookup },
+ { SPAWN_CMD, _handle_spawn },
+ { NULL, NULL},
+};
+
+
+static int
+_handle_fullinit(int fd, int lrank, client_req_t *req)
+{
+ int pmi_jobid, pmi_rank;
+ bool threaded;
+ int found, rc = PMI2_SUCCESS;
+ client_resp_t *resp;
+
+ debug3("mpi/pmi2: _handle_fullinit");
+
+ client_req_parse_body(req);
+
+ found = client_req_get_int(req, PMIJOBID_KEY, &pmi_jobid);
+ if (! found) {
+ error(PMIJOBID_KEY" missing in fullinit command");
+ rc = PMI2_ERR_INVALID_ARG;
+ goto response;
+ }
+ found = client_req_get_int(req, PMIRANK_KEY, &pmi_rank);
+ if (! found) {
+ error(PMIRANK_KEY" missing in fullinit command");
+ rc = PMI2_ERR_INVALID_ARG;
+ goto response;
+ }
+ found = client_req_get_bool(req, THREADED_KEY, &threaded);
+ if (! found) {
+ error(THREADED_KEY" missing in fullinit command");
+ rc = PMI2_ERR_INVALID_ARG;
+ goto response;
+ }
+
+ /* TODO: use threaded */
+
+response:
+ resp = client_resp_new();
+ /* what's the difference between DEBUGGED and VERBOSE? */
+ /* TODO: APPNUM */
+ client_resp_append(resp, CMD_KEY"="FULLINITRESP_CMD";" RC_KEY"=%d;"
+ PMIVERSION_KEY"=%d;" PMISUBVER_KEY"=%d;"
+ RANK_KEY"=%d;" SIZE_KEY"=%d;"
+ APPNUM_KEY"=-1;" DEBUGGED_KEY"="FALSE_VAL";"
+ PMIVERBOSE_KEY"=%s;",
+ rc,
+ PMI20_VERSION, PMI20_SUBVERSION,
+ job_info.gtids[lrank], job_info.ntasks,
+ (job_info.pmi_debugged ? TRUE_VAL : FALSE_VAL));
+ if (job_info.spawner_jobid) {
+ client_resp_append(resp, SPAWNERJOBID_KEY"=%s;",
+ job_info.spawner_jobid);
+ }
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: fullinit done");
+ return rc;
+}
+
+static int
+_handle_finalize(int fd, int lrank, client_req_t *req)
+{
+ client_resp_t *resp;
+ int rc = 0;
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="FINALIZERESP_CMD";"
+ RC_KEY"=%d;", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+ /* shutdown the PMI fd */
+ shutdown(fd, SHUT_RDWR);
+ close(fd);
+ task_finalize(lrank);
+ return rc;
+}
+
+static int
+_handle_abort(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+
+ debug3("mpi/pmi2: in _handle_abort");
+ /* no response needed. just cancel the job */
+ slurm_kill_job_step(job_info.jobid, job_info.stepid, SIGKILL);
+ return rc;
+}
+
+static int
+_handle_job_getid(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+ client_resp_t *resp;
+
+ debug3("mpi/pmi2: in _handle_job_getid");
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="JOBGETIDRESP_CMD";" RC_KEY"=0;"
+ JOBID_KEY"=%s;", job_info.pmi_jobid);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+ debug3("mpi/pmi2: out _handle_job_getid");
+ return rc;
+}
+
+static int
+_handle_job_connect(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+ error("mpi/pmi2: job connect not implemented for now");
+ return rc;
+}
+
+static int
+_handle_job_disconnect(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+ error("mpi/pmi2: job disconnect not implemented for now");
+ return rc;
+}
+
+static int
+_handle_kvs_put(int fd, int lrank, client_req_t *req)
+{
+ int rc = SLURM_SUCCESS;
+ client_resp_t *resp;
+ char *key = NULL, *val = NULL;
+
+ debug3("mpi/pmi2: in _handle_kvs_put");
+ client_req_parse_body(req);
+ client_req_get_str(req, KEY_KEY, &key);
+ client_req_get_str(req, VALUE_KEY, &val);
+
+ /* no need to add k-v to hash. just get it ready to be up-forward */
+ rc = temp_kvs_add(key, val);
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="KVSPUTRESP_CMD";" RC_KEY"=%d;", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_kvs_put");
+ return rc;
+}
+
+static int
+_handle_kvs_fence(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+
+ debug3("mpi/pmi2: in _handle_kvs_fence, from task %d",
+ job_info.gtids[lrank]);
+ if (tasks_to_wait == 0 && children_to_wait == 0) {
+ tasks_to_wait = job_info.ltasks;
+ children_to_wait = tree_info.num_children;
+ }
+ tasks_to_wait --;
+
+ /* mutex protection is not required */
+ if (tasks_to_wait == 0 && children_to_wait == 0) {
+ rc = temp_kvs_send();
+ }
+ debug3("mpi/pmi2: out _handle_kvs_fence, tasks_to_wait=%d, "
+ "children_to_wait=%d", tasks_to_wait, children_to_wait);
+ return rc;
+}
+
+
+static int
+_handle_kvs_get(int fd, int lrank, client_req_t *req)
+{
+ int rc;
+ client_resp_t *resp;
+ char *key, *val;
+
+ debug3("mpi/pmi2: in _handle_kvs_get");
+
+ client_req_parse_body(req);
+ client_req_get_str(req, KEY_KEY, &key);
+
+ val = kvs_get(key);
+
+ resp = client_resp_new();
+ if (val != NULL) {
+ client_resp_append(resp, CMD_KEY"="KVSGETRESP_CMD";"
+ RC_KEY"=0;" FOUND_KEY"="TRUE_VAL";"
+ VALUE_KEY"=%s;", val);
+ } else {
+ client_resp_append(resp, CMD_KEY"="KVSGETRESP_CMD";"
+ RC_KEY"=0;" FOUND_KEY"="FALSE_VAL";");
+ }
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_kvs_get");
+ return rc;
+}
+
+static int
+_handle_info_getnodeattr(int fd, int lrank, client_req_t *req)
+{
+ int rc = 0;
+ client_resp_t *resp;
+ char *key, *val;
+ bool wait = false;
+
+ debug3("mpi/pmi2: in _handle_info_getnodeattr from lrank %d", lrank);
+
+ client_req_parse_body(req);
+ client_req_get_str(req, KEY_KEY, &key);
+ client_req_get_bool(req, WAIT_KEY, &wait);
+
+ val = node_attr_get(key);
+
+ if (val != NULL || (! wait)) {
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="GETNODEATTRRESP_CMD";"
+ RC_KEY"=0;" );
+ if (val == NULL) {
+ client_resp_append(resp, FOUND_KEY"="FALSE_VAL";" );
+ } else {
+ client_resp_append(resp, FOUND_KEY"="TRUE_VAL";"
+ VALUE_KEY"=%s;", val);
+ }
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+ } else {
+ rc = enqueue_nag_req(fd, lrank, key);
+ }
+
+ debug3("mpi/pmi2: out _handle_info_getnodeattr");
+ return rc;
+}
+
+static int
+_handle_info_putnodeattr(int fd, int lrank, client_req_t *req)
+{
+ char *key, *val;
+ client_resp_t *resp;
+ int rc = 0;
+
+ debug3("mpi/pmi2: in _handle_info_putnodeattr");
+
+ client_req_parse_body(req);
+ client_req_get_str(req, KEY_KEY, &key);
+ client_req_get_str(req, VALUE_KEY, &val);
+
+ rc = node_attr_put(key, val);
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="PUTNODEATTRRESP_CMD";" RC_KEY"=%d;", rc);
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_info_putnodeattr");
+ return rc;
+}
+
+static int
+_handle_info_getjobattr(int fd, int lrank, client_req_t *req)
+{
+ char *key, *val;
+ client_resp_t *resp;
+ int rc;
+
+ debug3("mpi/pmi2: in _handle_info_getjobattr");
+ client_req_parse_body(req);
+ client_req_get_str(req, KEY_KEY, &key);
+
+ val = job_attr_get(key);
+
+ resp = client_resp_new();
+ client_resp_append(resp, CMD_KEY"="GETJOBATTRRESP_CMD";" RC_KEY"=0;");
+ if (val != NULL) {
+ client_resp_append(resp, FOUND_KEY"="TRUE_VAL";" VALUE_KEY"=%s;",
+ val);
+ } else {
+ client_resp_append(resp, FOUND_KEY"="FALSE_VAL";");
+ }
+
+ rc = client_resp_send(resp, fd);
+ client_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_info_getjobattr");
+ return rc;
+}
+
+static int
+_handle_name_publish(int fd, int lrank, client_req_t *req)
+{
+ error("mpi/pmi2: name publish not implemented");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_name_unpublish(int fd, int lrank, client_req_t *req)
+{
+ return SLURM_ERROR;
+}
+
+static int
+_handle_name_lookup(int fd, int lrank, client_req_t *req)
+{
+ return SLURM_ERROR;
+}
+
+static int
+_handle_spawn(int fd, int lrank, client_req_t *req)
+{
+ int rc;
+ spawn_req_t *spawn_req = NULL;
+ spawn_resp_t *spawn_resp = NULL;
+ client_resp_t *task_resp;
+
+ debug3("mpi/pmi2: in _handle_spawn");
+
+ client_req_parse_body(req);
+ spawn_req = client_req_parse_spawn_req(req);
+ if (spawn_req == NULL) {
+ task_resp = client_resp_new();
+ client_resp_append(task_resp, CMD_KEY"="SPAWNRESP_CMD";"
+ RC_KEY"=%d;"
+ ERRMSG_KEY"=invalid command;",
+ PMI2_ERR_INVALID_ARGS);
+ client_resp_send(task_resp, fd);
+ client_resp_free(task_resp);
+ return SLURM_ERROR;
+ }
+
+ /* a resp will be send back from srun.
+ * this will not be forwarded to the tasks */
+ rc = spawn_req_send_to_srun(spawn_req, &spawn_resp);
+ if (spawn_resp->rc != SLURM_SUCCESS) {
+ task_resp = client_resp_new();
+ client_resp_append(task_resp, CMD_KEY"="SPAWNRESP_CMD";"
+ RC_KEY"=%d;"
+ ERRMSG_KEY"=spawn failed;",
+ spawn_resp->rc);
+ client_resp_send(task_resp, fd);
+ client_resp_free(task_resp);
+ spawn_req_free(spawn_req);
+ spawn_resp_free(spawn_resp);
+ debug("mpi/pmi2: spawn failed");
+ return SLURM_ERROR;
+ }
+
+ debug3("mpi/pmi2: spawn request sent to srun");
+ spawn_psr_enqueue(spawn_resp->seq, fd, lrank, NULL);
+
+ spawn_req_free(spawn_req);
+ spawn_resp_free(spawn_resp);
+ debug3("mpi/pmi2: out _handle_spawn");
+ return rc;
+}
+
+/**************************************************/
+
+extern int
+handle_pmi2_cmd(int fd, int lrank)
+{
+ int i, len;
+ char len_buf[7], *buf = NULL;
+ client_req_t *req = NULL;
+ int rc = SLURM_SUCCESS;
+
+ debug3("mpi/pmi2: in handle_pmi2_cmd");
+
+ safe_read(fd, len_buf, 6);
+ len_buf[6] = '\0';
+ len = atoi(len_buf);
+ buf = xmalloc(len + 1);
+ safe_read(fd, buf, len);
+ buf[len] = '\0';
+
+ debug2("mpi/pmi2: got client request: %s %s", len_buf, buf);
+
+ req = client_req_init(len, buf);
+ if (req == NULL) {
+ error("mpi/pmi2: invalid client request");
+ return SLURM_ERROR;
+ }
+
+ i = 0;
+ while (pmi2_cmd_handlers[i].cmd != NULL) {
+ if (!strcmp(req->cmd, pmi2_cmd_handlers[i].cmd))
+ break;
+ i ++;
+ }
+ if (pmi2_cmd_handlers[i].cmd == NULL) {
+ error("mpi/pmi2: invalid pmi2 command received: '%s'", req->cmd);
+ rc = SLURM_ERROR;
+ } else {
+ rc = pmi2_cmd_handlers[i].handler(fd, lrank, req);
+ }
+ client_req_free(req);
+
+ debug3("mpi/pmi2: out handle_pmi2_cmd");
+
+ return rc;
+
+rwfail:
+ xfree(buf);
+ return SLURM_ERROR;
+}
+
diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c
new file mode 100644
index 0000000..6338bb3
--- /dev/null
+++ b/src/plugins/mpi/pmi2/setup.c
@@ -0,0 +1,729 @@
+/*****************************************************************************\
+ ** setup.c - PMI2 server setup
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <poll.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+
+#include "src/common/slurm_xlator.h"
+#include "src/common/mpi.h"
+#include "src/common/xstring.h"
+#include "src/common/proc_args.h"
+#include "src/common/net.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+#include "src/slurmd/slurmd/reverse_tree_math.h"
+
+#include "setup.h"
+#include "tree.h"
+#include "pmi.h"
+#include "spawn.h"
+#include "kvs.h"
+
+#define PMI2_SOCK_ADDR_FMT "/tmp/sock.pmi2.%u.%u"
+
+
+extern char **environ;
+
+static bool run_in_stepd = 0;
+
+int tree_sock;
+int *task_socks;
+char tree_sock_addr[128];
+pmi2_job_info_t job_info;
+pmi2_tree_info_t tree_info;
+
+extern bool
+in_stepd(void)
+{
+ return run_in_stepd;
+}
+
+static void
+_remove_tree_sock(void)
+{
+ unlink(tree_sock_addr);
+}
+
+static int
+_setup_stepd_job_info(const slurmd_job_t *job, char ***env)
+{
+ char *p;
+ int i;
+
+ memset(&job_info, 0, sizeof(job_info));
+
+ job_info.jobid = job->jobid;
+ job_info.stepid = job->stepid;
+ job_info.nnodes = job->nnodes;
+ job_info.nodeid = job->nodeid;
+ job_info.ntasks = job->ntasks;
+ job_info.ltasks = job->node_tasks;
+ job_info.gtids = xmalloc(job->node_tasks * sizeof(uint32_t));
+ for (i = 0; i < job->node_tasks; i ++) {
+ job_info.gtids[i] = job->task[i]->gtid;
+ }
+
+ p = getenvp(*env, PMI2_PMI_DEBUGGED_ENV);
+ if (p) {
+ job_info.pmi_debugged = atoi(p);
+ } else {
+ job_info.pmi_debugged = 0;
+ }
+ p = getenvp(*env, PMI2_SPAWN_SEQ_ENV);
+ if (p) { /* spawned */
+ job_info.spawn_seq = atoi(p);
+ unsetenvp(*env, PMI2_SPAWN_SEQ_ENV);
+ p = getenvp(*env, PMI2_SPAWNER_JOBID_ENV);
+ job_info.spawner_jobid = xstrdup(p);
+ unsetenvp(*env, PMI2_SPAWNER_JOBID_ENV);
+ } else {
+ job_info.spawn_seq = 0;
+ job_info.spawner_jobid = NULL;
+ }
+ p = getenvp(*env, PMI2_PMI_JOBID_ENV);
+ if (p) {
+ job_info.pmi_jobid = xstrdup(p);
+ unsetenvp(*env, PMI2_PMI_JOBID_ENV);
+ } else {
+ xstrfmtcat(job_info.pmi_jobid, "%u.%u", job->jobid,
+ job->stepid);
+ }
+ p = getenvp(*env, PMI2_STEP_NODES_ENV);
+ if (!p) {
+ error("mpi/pmi2: unable to find nodes in job environment");
+ return SLURM_ERROR;
+ } else {
+ job_info.step_nodelist = xstrdup(p);
+ unsetenvp(*env, PMI2_STEP_NODES_ENV);
+ }
+ /*
+ * how to get the mapping info from stepd directly?
+ * there is the task distribution info in the launch_tasks_request_msg_t,
+ * but it is not stored in the slurmd_job_t.
+ */
+ p = getenvp(*env, PMI2_PROC_MAPPING_ENV);
+ if (!p) {
+ error("PMI2_PROC_MAPPING_ENV not found");
+ return SLURM_ERROR;
+ } else {
+ job_info.proc_mapping = xstrdup(p);
+ unsetenvp(*env, PMI2_PROC_MAPPING_ENV);
+ }
+
+ job_info.job_env = env_array_copy((const char **)*env);
+
+ job_info.srun_job = NULL;
+ job_info.srun_opt = NULL;
+
+ return SLURM_SUCCESS;
+}
+
+static int
+_setup_stepd_tree_info(const slurmd_job_t *job, char ***env)
+{
+ hostlist_t hl;
+ char srun_host[64];
+ uint16_t port;
+ char *p;
+ int tree_width;
+
+ /* job info available */
+
+ memset(&tree_info, 0, sizeof(tree_info));
+
+ hl = hostlist_create(job_info.step_nodelist);
+ p = hostlist_nth(hl, job_info.nodeid); /* strdup-ed */
+ tree_info.this_node = xstrdup(p);
+ free(p);
+
+ /* this only controls the upward communication tree width */
+ p = getenvp(*env, PMI2_TREE_WIDTH_ENV);
+ if (p) {
+ tree_width = atoi(p);
+ if (tree_width < 2) {
+ info("invalid PMI2 tree width value (%d) detected. "
+ "fallback to default value.", tree_width);
+ tree_width = slurm_get_tree_width();
+ }
+ } else {
+ tree_width = slurm_get_tree_width();
+ }
+
+ /* TODO: cannot launch 0 tasks on node */
+
+ /*
+ * In tree position calculation, root of the tree is srun with id 0.
+ * Stepd's id will be its nodeid plus 1.
+ */
+ reverse_tree_info(job_info.nodeid + 1, job_info.nnodes + 1,
+ tree_width, &tree_info.parent_id,
+ &tree_info.num_children, &tree_info.depth,
+ &tree_info.max_depth);
+ tree_info.parent_id --; /* restore real nodeid */
+ if (tree_info.parent_id < 0) { /* parent is srun */
+ tree_info.parent_node = NULL;
+ } else {
+ p = hostlist_nth(hl, tree_info.parent_id);
+ tree_info.parent_node = xstrdup(p);
+ free(p);
+ }
+ hostlist_destroy(hl);
+
+ tree_info.pmi_port = 0; /* not used */
+
+ p = getenvp(*env, "SLURM_SRUN_COMM_HOST");
+ if (!p) {
+ error("mpi/pmi2: unable to find srun comm ifhn in env");
+ return SLURM_ERROR;
+ } else {
+ strncpy(srun_host, p, 64);
+ }
+ p = getenvp(*env, PMI2_SRUN_PORT_ENV);
+ if (!p) {
+ error("mpi/pmi2: unable to find srun pmi2 port in env");
+ return SLURM_ERROR;
+ } else {
+ port = atoi(p);
+ unsetenvp(*env, PMI2_SRUN_PORT_ENV);
+ }
+ tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
+ slurm_set_addr(tree_info.srun_addr, port, srun_host);
+
+ return SLURM_SUCCESS;
+}
+
+/*
+ * setup sockets for slurmstepd
+ */
+static int
+_setup_stepd_sockets(const slurmd_job_t *job, char ***env)
+{
+ struct sockaddr_un sa;
+ int i;
+
+ debug("mpi/pmi2: setup sockets");
+
+ tree_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (tree_sock < 0) {
+ error("mpi/pmi2: failed to create tree socket: %m");
+ return SLURM_ERROR;
+ }
+ sa.sun_family = PF_UNIX;
+ snprintf(sa.sun_path, sizeof(sa.sun_path), PMI2_SOCK_ADDR_FMT,
+ job->jobid, job->stepid);
+ unlink(sa.sun_path); /* remove possible old socket */
+
+ if (bind(tree_sock, (struct sockaddr *)&sa, SUN_LEN(&sa)) < 0) {
+ error("mpi/pmi2: failed to bind tree socket: %m");
+ unlink(sa.sun_path);
+ return SLURM_ERROR;
+ }
+ if (listen(tree_sock, 64) < 0) {
+ error("mpi/pmi2: failed to listen tree socket: %m");
+ unlink(sa.sun_path);
+ return SLURM_ERROR;
+ }
+
+ /* remove the tree socket file on exit */
+ strncpy(tree_sock_addr, sa.sun_path, 128);
+ atexit(_remove_tree_sock);
+
+ task_socks = xmalloc(2 * job->node_tasks * sizeof(int));
+ for (i = 0; i < job->node_tasks; i ++) {
+ socketpair(AF_UNIX, SOCK_STREAM, 0, &task_socks[i * 2]);
+ /* this must be delayed after the tasks have been forked */
+/* close(TASK_PMI_SOCK(i)); */
+ }
+ return SLURM_SUCCESS;
+}
+
+static int
+_setup_stepd_kvs(const slurmd_job_t *job, char ***env)
+{
+ int rc = SLURM_SUCCESS, i = 0, pp_cnt = 0;
+ char *p, env_key[32], *ppkey, *ppval;
+
+ rc = temp_kvs_init();
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ rc = kvs_init();
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ /* preput */
+ p = getenvp(*env, PMI2_PREPUT_CNT_ENV);
+ if (p) {
+ pp_cnt = atoi(p);
+ }
+
+ for (i = 0; i < pp_cnt; i ++) {
+ snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i);
+ p = getenvp(*env, env_key);
+ ppkey = p; /* getenvp will not modify p */
+ snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i);
+ p = getenvp(*env, env_key);
+ ppval = p;
+ kvs_put(ppkey, ppval);
+ }
+
+ /*
+ * For PMI11.
+ * A better logic would be to put PMI_process_mapping in KVS only if
+ * the task distribution method is not "arbitrary", because in
+ * "arbitrary" distribution the process mapping varible is not correct.
+ * MPICH2 may deduce the clique info from the hostnames. But that
+ * is rather costly.
+ */
+ kvs_put("PMI_process_mapping", job_info.proc_mapping);
+
+ return SLURM_SUCCESS;
+}
+
+extern int
+pmi2_setup_stepd(const slurmd_job_t *job, char ***env)
+{
+ int rc;
+
+ run_in_stepd = true;
+
+ /* job info */
+ rc = _setup_stepd_job_info(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ /* tree info */
+ rc = _setup_stepd_tree_info(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ /* sockets */
+ rc = _setup_stepd_sockets(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ /* kvs */
+ rc = _setup_stepd_kvs(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ return SLURM_SUCCESS;
+}
+
+/**************************************************************/
+
+/* returned string should be xfree-ed by caller */
+static char *
+_get_proc_mapping(const mpi_plugin_client_info_t *job)
+{
+ uint32_t node_cnt, task_cnt, task_mapped, node_task_cnt, **tids,
+ block;
+ uint16_t task_dist, *tasks, *rounds;
+ int i, start_id, end_id;
+ char *mapping = NULL;
+
+ node_cnt = job->step_layout->node_cnt;
+ task_cnt = job->step_layout->task_cnt;
+ task_dist = job->step_layout->task_dist;
+ tasks = job->step_layout->tasks;
+ tids = job->step_layout->tids;
+
+ /* for now, PMI2 only supports vector processor mapping */
+
+ if (task_dist == SLURM_DIST_CYCLIC ||
+ task_dist == SLURM_DIST_CYCLIC_CYCLIC ||
+ task_dist == SLURM_DIST_CYCLIC_BLOCK) {
+ mapping = xstrdup("(vector");
+
+ rounds = xmalloc (node_cnt * sizeof(uint16_t));
+ task_mapped = 0;
+ while (task_mapped < task_cnt) {
+ start_id = 0;
+ /* find start_id */
+ while (start_id < node_cnt) {
+ while (start_id < node_cnt &&
+ ( rounds[start_id] >= tasks[start_id] ||
+ (task_mapped !=
+ tids[start_id][rounds[start_id]]) )) {
+ start_id ++;
+ }
+ if (start_id >= node_cnt)
+ break;
+ /* block is always 1 */
+ /* find end_id */
+ end_id = start_id;
+ while (end_id < node_cnt &&
+ ( rounds[end_id] < tasks[end_id] &&
+ (task_mapped ==
+ tids[end_id][rounds[end_id]]) )) {
+ rounds[end_id] ++;
+ task_mapped ++;
+ end_id ++;
+ }
+ xstrfmtcat(mapping, ",(%u,%u,1)", start_id,
+ end_id - start_id);
+ start_id = end_id;
+ }
+ }
+ xfree(rounds);
+ xstrcat(mapping, ")");
+ } else if (task_dist == SLURM_DIST_ARBITRARY) {
+ /*
+ * MPICH2 will think that each task runs on a seperate node.
+ * The program will run, but no SHM will be used for
+ * communication.
+ */
+ mapping = xstrdup("(vector");
+ xstrfmtcat(mapping, ",(0,%u,1)", job->step_layout->task_cnt);
+ xstrcat(mapping, ")");
+
+ } else if (task_dist == SLURM_DIST_PLANE) {
+ mapping = xstrdup("(vector");
+
+ rounds = xmalloc (node_cnt * sizeof(uint16_t));
+ task_mapped = 0;
+ while (task_mapped < task_cnt) {
+ start_id = 0;
+ /* find start_id */
+ while (start_id < node_cnt) {
+ while (start_id < node_cnt &&
+ ( rounds[start_id] >= tasks[start_id] ||
+ (task_mapped !=
+ tids[start_id][rounds[start_id]]) )) {
+ start_id ++;
+ }
+ if (start_id >= node_cnt)
+ break;
+ /* find start block. block may be less than plane size */
+ block = 0;
+ while (rounds[start_id] < tasks[start_id] &&
+ (task_mapped ==
+ tids[start_id][rounds[start_id]])) {
+ block ++;
+ rounds[start_id] ++;
+ task_mapped ++;
+ }
+ /* find end_id */
+ end_id = start_id + 1;
+ while (end_id < node_cnt &&
+ (rounds[end_id] + block - 1 <
+ tasks[end_id])) {
+ for (i = 0; i < tasks[end_id] - rounds[end_id]; i ++) {
+ if (task_mapped + i !=
+ tids[end_id][rounds[end_id] + i]) {
+ break;
+ }
+ }
+ if (i != block)
+ break;
+ rounds[end_id] += block;
+ task_mapped += block;
+ end_id ++;
+ }
+ xstrfmtcat(mapping, ",(%u,%u,%u)", start_id,
+ end_id - start_id, block);
+ start_id = end_id;
+ }
+ }
+ xfree(rounds);
+ xstrcat(mapping, ")");
+
+ } else { /* BLOCK mode */
+ mapping = xstrdup("(vector");
+ start_id = 0;
+ node_task_cnt = tasks[start_id];
+ for (i = start_id + 1; i < node_cnt; i ++) {
+ if (node_task_cnt == tasks[i])
+ continue;
+ xstrfmtcat(mapping, ",(%u,%u,%hu)", start_id,
+ i - start_id, node_task_cnt);
+ start_id = i;
+ node_task_cnt = tasks[i];
+ }
+ xstrfmtcat(mapping, ",(%u,%u,%hu))", start_id, i - start_id,
+ node_task_cnt);
+ }
+
+ debug("mpi/pmi2: processor mapping: %s", mapping);
+ return mapping;
+}
+
+static int
+_setup_srun_job_info(const mpi_plugin_client_info_t *job)
+{
+ char *p;
+ void *handle = NULL, *sym = NULL;
+
+ memset(&job_info, 0, sizeof(job_info));
+
+ job_info.jobid = job->jobid;
+ job_info.stepid = job->stepid;
+ job_info.nnodes = job->step_layout->node_cnt;
+ job_info.nodeid = -1; /* id in tree. not used. */
+ job_info.ntasks = job->step_layout->task_cnt;
+ job_info.ltasks = 0; /* not used */
+ job_info.gtids = NULL; /* not used */
+
+
+ p = getenv(PMI2_PMI_DEBUGGED_ENV);
+ if (p) {
+ job_info.pmi_debugged = atoi(p);
+ } else {
+ job_info.pmi_debugged = 0;
+ }
+ p = getenv(PMI2_SPAWN_SEQ_ENV);
+ if (p) { /* spawned */
+ job_info.spawn_seq = atoi(p);
+ p = getenv(PMI2_SPAWNER_JOBID_ENV);
+ job_info.spawner_jobid = xstrdup(p);
+ /* env unset in stepd */
+ } else {
+ job_info.spawn_seq = 0;
+ job_info.spawner_jobid = NULL;
+ }
+
+ job_info.step_nodelist = xstrdup(job->step_layout->node_list);
+ job_info.proc_mapping = _get_proc_mapping(job);
+ if (job_info.proc_mapping == NULL) {
+ return SLURM_ERROR;
+ }
+ p = getenv(PMI2_PMI_JOBID_ENV);
+ if (p) { /* spawned */
+ job_info.pmi_jobid = xstrdup(p);
+ } else {
+ xstrfmtcat(job_info.pmi_jobid, "%u.%u", job->jobid,
+ job->stepid);
+ }
+ job_info.job_env = env_array_copy((const char **)environ);
+
+ /* hjcao: this is really dirty.
+ But writing a new launcher is not desirable. */
+ handle = dlopen(NULL, RTLD_LAZY);
+ if (handle == NULL) {
+ error("mpi/pmi2: failed to dlopen()");
+ return SLURM_ERROR;
+ }
+ sym = dlsym(handle, "job");
+ if (sym == NULL) {
+ /* if called directly in API, there may be no symbol available */
+ verbose ("mpi/pmi2: failed to find symbol 'job'");
+ job_info.srun_job = NULL;
+ } else {
+ job_info.srun_job = *(srun_job_t **)sym;
+ }
+ sym = dlsym(handle, "opt");
+ if (sym == NULL) {
+ verbose("mpi/pmi2: failed to find symbol 'opt'");
+ job_info.srun_opt = NULL;
+ } else {
+ job_info.srun_opt = (opt_t *)sym;
+ }
+ dlclose(handle);
+
+ return SLURM_SUCCESS;
+}
+
+static int
+_setup_srun_tree_info(const mpi_plugin_client_info_t *job)
+{
+ char *p;
+ uint16_t p_port;
+
+ memset(&tree_info, 0, sizeof(tree_info));
+
+ tree_info.this_node = "launcher"; /* not used */
+ tree_info.parent_id = -2; /* not used */
+ tree_info.parent_node = NULL; /* not used */
+ tree_info.num_children = job_info.nnodes;
+ tree_info.depth = 0; /* not used */
+ tree_info.max_depth = 0; /* not used */
+ /* pmi_port set in _setup_srun_sockets */
+ p = getenv(PMI2_SPAWNER_PORT_ENV);
+ if (p) { /* spawned */
+ p_port = atoi(p);
+ tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
+ /* assume there is always a lo interface */
+ slurm_set_addr(tree_info.srun_addr, p_port, "127.0.0.1");
+ } else
+ tree_info.srun_addr = NULL;
+
+ snprintf(tree_sock_addr, 128, PMI2_SOCK_ADDR_FMT,
+ job->jobid, job->stepid);
+
+ return SLURM_SUCCESS;
+}
+
+static int
+_setup_srun_socket(const mpi_plugin_client_info_t *job)
+{
+ if (net_stream_listen(&tree_sock,
+ (short int *)&tree_info.pmi_port) < 0) {
+ error("mpi/pmi2: Failed to create tree socket");
+ return SLURM_ERROR;
+ }
+ debug("mpi/pmi2: srun pmi port: %hu", tree_info.pmi_port);
+
+ return SLURM_SUCCESS;
+}
+
+static int
+_setup_srun_kvs(const mpi_plugin_client_info_t *job)
+{
+ int rc;
+
+ rc = temp_kvs_init();
+ return rc;
+}
+
+static int
+_setup_srun_environ(const mpi_plugin_client_info_t *job, char ***env)
+{
+ /* ifhn will be set in SLURM_SRUN_COMM_HOST by slurmd */
+ env_array_overwrite_fmt(env, PMI2_SRUN_PORT_ENV, "%hu",
+ tree_info.pmi_port);
+ env_array_overwrite_fmt(env, PMI2_STEP_NODES_ENV, "%s",
+ job_info.step_nodelist);
+ env_array_overwrite_fmt(env, PMI2_PROC_MAPPING_ENV, "%s",
+ job_info.proc_mapping);
+ return SLURM_SUCCESS;
+}
+
+static void *
+_task_launch_detection(void *unused)
+{
+ spawn_resp_t *resp;
+ srun_job_state_t state;
+
+ if (job_info.srun_job) {
+ while (1) {
+ state = job_state(job_info.srun_job);
+ if (state >= SRUN_JOB_RUNNING) {
+ break;
+ }
+ usleep(1000*50);
+ }
+ } else {
+ /* take the tasks launched successfully */
+ state = SRUN_JOB_RUNNING;
+ }
+
+ /* send a resp to spawner srun */
+ resp = spawn_resp_new();
+ resp->seq = job_info.spawn_seq;
+ resp->jobid = xstrdup(job_info.pmi_jobid);
+ resp->error_cnt = 0; /* TODO */
+ if (state == SRUN_JOB_RUNNING) {
+ resp->rc = 0;
+ } else {
+ resp->rc = 1;
+ }
+ spawn_resp_send_to_srun(resp);
+ spawn_resp_free(resp);
+ return NULL;
+}
+
+static int
+_setup_srun_task_launch_detection(void)
+{
+ int retries = 0;
+ pthread_t tid;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ while ((errno = pthread_create(&tid, &attr,
+ &_task_launch_detection, NULL))) {
+ if (++retries > 5) {
+ error ("mpi/pmi2: pthread_create error %m");
+ slurm_attr_destroy(&attr);
+ return SLURM_ERROR;
+ }
+ sleep(1);
+ }
+ slurm_attr_destroy(&attr);
+ debug("mpi/pmi2: task launch detection thread (%lu) started",
+ (unsigned long) tid);
+
+ return SLURM_SUCCESS;
+}
+
+extern int
+pmi2_setup_srun(const mpi_plugin_client_info_t *job, char ***env)
+{
+ int rc;
+
+ run_in_stepd = false;
+
+ rc = _setup_srun_job_info(job);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ rc = _setup_srun_tree_info(job);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ rc = _setup_srun_socket(job);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ rc = _setup_srun_kvs(job);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ rc = _setup_srun_environ(job, env);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+
+ if (job_info.spawn_seq) {
+ rc = _setup_srun_task_launch_detection();
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ }
+
+ return SLURM_SUCCESS;
+}
+
diff --git a/src/plugins/mpi/pmi2/setup.h b/src/plugins/mpi/pmi2/setup.h
new file mode 100644
index 0000000..2c28314
--- /dev/null
+++ b/src/plugins/mpi/pmi2/setup.h
@@ -0,0 +1,112 @@
+/*****************************************************************************\
+ ** setup.h - MPI/PMI2 plugin setup
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _SETUP_H
+#define _SETUP_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+# if HAVE_INTTYPES_H
+# include <inttypes.h>
+# else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+# endif /* HAVE_INTTYPES_H */
+#else /* !HAVE_CONFIG_H */
+# include <inttypes.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <slurm/slurm_errno.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/xstring.h"
+#include "src/common/pack.h"
+#include "src/common/mpi.h"
+
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+
+#include "src/srun/srun_job.h"
+#include "src/srun/opt.h"
+
+#include "tree.h"
+
+typedef struct pmi2_job_info {
+ uint32_t jobid; /* Current SLURM job id */
+ uint32_t stepid; /* Current step id (or NO_VAL) */
+ uint32_t nnodes; /* number of nodes in current job step */
+ uint32_t nodeid; /* relative position of this node in job */
+ uint32_t ntasks; /* total number of tasks in current job */
+ uint32_t ltasks; /* number of tasks on *this* (local) node */
+ uint32_t *gtids; /* global task ids of the tasks */
+ uint32_t spawn_seq; /* seq of spawn. 0 if not spawned */
+
+ int pmi_debugged; /* whether output verbose PMI messages */
+ char *step_nodelist; /* list of nodes in this job step */
+ char *proc_mapping; /* processor mapping */
+ char *pmi_jobid; /* PMI job id */
+ char *spawner_jobid; /* spawner pmi job id */
+ char **job_env; /* environment of job. use in stepd */
+
+ srun_job_t *srun_job; /* used only in srun */
+ opt_t *srun_opt; /* used only in srun */
+} pmi2_job_info_t;
+
+typedef struct pmi2_tree_info {
+ char *this_node; /* this nodename */
+ char *parent_node; /* parent nodename */
+ int parent_id; /* parent nodeid */
+ int num_children; /* number of children stepds */
+ int depth; /* depth in tree */
+ int max_depth; /* max depth of the tree */
+ uint16_t pmi_port; /* PMI2 comm port of this srun */
+ slurm_addr_t *srun_addr;/* PMI2 comm address parent srun */
+} pmi2_tree_info_t;
+
+
+extern pmi2_job_info_t job_info;
+extern pmi2_tree_info_t tree_info;
+extern char tree_sock_addr[];
+extern int tree_sock;
+extern int *task_socks;
+#define STEPD_PMI_SOCK(lrank) task_socks[lrank * 2]
+#define TASK_PMI_SOCK(lrank) task_socks[lrank * 2 + 1]
+
+extern bool in_stepd(void);
+extern int pmi2_setup_stepd(const slurmd_job_t *job, char ***env);
+extern int pmi2_setup_srun(const mpi_plugin_client_info_t *job, char ***env);
+
+#endif /* _SETUP_H */
diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c
new file mode 100644
index 0000000..e080963
--- /dev/null
+++ b/src/plugins/mpi/pmi2/spawn.c
@@ -0,0 +1,695 @@
+/*****************************************************************************\
+ ** spawn.c - PMI job spawn handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "src/common/slurm_xlator.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+#include "src/common/list.h"
+#include "src/common/slurm_protocol_interface.h"
+
+#include "spawn.h"
+#include "setup.h"
+#include "tree.h"
+#include "pmi.h"
+
+static uint32_t spawn_seq = 1; /* 0 if not spawned */
+static pid_t *spawned_srun_pids = NULL;
+
+typedef struct pending_spawn_req {
+ uint32_t seq;
+ int fd;
+ int lrank;
+ char *from_node; /* for srun */
+ struct pending_spawn_req *next;
+} psr_t;
+
+static psr_t *psr_list = NULL;
+
+extern spawn_subcmd_t *
+spawn_subcmd_new(void)
+{
+ spawn_subcmd_t *subcmd;
+
+ subcmd = xmalloc(sizeof(spawn_subcmd_t));
+ return subcmd;
+}
+
+extern void
+spawn_subcmd_free(spawn_subcmd_t *subcmd)
+{
+ int i;
+
+ if (subcmd) {
+ xfree(subcmd->cmd);
+ if (subcmd->argv) {
+ for (i = 0; i < subcmd->argc; i ++) {
+ xfree(subcmd->argv[i]);
+ }
+ xfree(subcmd->argv);
+ }
+ if (subcmd->info_keys) {
+ for (i = 0; i < subcmd->info_cnt; i ++) {
+ xfree(subcmd->info_keys[i]);
+ }
+ xfree(subcmd->info_keys);
+ }
+ if (subcmd->info_vals) {
+ for (i = 0; i < subcmd->info_cnt; i ++) {
+ xfree(subcmd->info_vals[i]);
+ }
+ xfree(subcmd->info_vals);
+ }
+ xfree(subcmd);
+ }
+}
+
+extern spawn_req_t *
+spawn_req_new(void)
+{
+ spawn_req_t *req;
+
+ req = xmalloc(sizeof(spawn_req_t));
+ req->seq = 0;
+ req->from_node = xstrdup(tree_info.this_node);
+ return req;
+}
+
+extern void
+spawn_req_free(spawn_req_t *req)
+{
+ int i;
+
+ if (req) {
+ xfree(req->from_node);
+ if (req->pp_keys) {
+ for (i = 0; i < req->preput_cnt; i ++) {
+ xfree(req->pp_keys[i]);
+ }
+ xfree(req->pp_keys);
+ }
+ if (req->pp_vals) {
+ for (i = 0; i < req->preput_cnt; i ++) {
+ xfree(req->pp_vals[i]);
+ }
+ xfree(req->pp_vals);
+ }
+ if (req->subcmds) {
+ for (i = 0; i < req->subcmd_cnt; i ++) {
+ spawn_subcmd_free(req->subcmds[i]);
+ }
+ xfree(req->subcmds);
+ }
+ xfree(req);
+ }
+}
+
+extern void
+spawn_req_pack(spawn_req_t *req, Buf buf)
+{
+ int i, j;
+ spawn_subcmd_t *subcmd;
+ void *auth_cred;
+
+ auth_cred = g_slurm_auth_create(NULL, 2, NULL);
+ if (auth_cred == NULL) {
+ error("authentication: %s",
+ g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) );
+ return;
+ }
+ (void) g_slurm_auth_pack(auth_cred, buf);
+ (void) g_slurm_auth_destroy(auth_cred);
+
+ pack32(req->seq, buf);
+ packstr(req->from_node, buf);
+ pack32(req->subcmd_cnt, buf);
+ pack32(req->preput_cnt, buf);
+ for (i = 0; i < req->preput_cnt; i ++) {
+ packstr(req->pp_keys[i], buf);
+ packstr(req->pp_vals[i], buf);
+ }
+ for (i = 0; i < req->subcmd_cnt; i ++) {
+ subcmd = req->subcmds[i];
+
+ packstr(subcmd->cmd, buf);
+ pack32(subcmd->max_procs, buf);
+ pack32(subcmd->argc, buf);
+ for (j = 0; j < subcmd->argc; j ++) {
+ packstr(subcmd->argv[j], buf);
+ }
+ pack32(subcmd->info_cnt, buf);
+ for (j = 0; j < subcmd->info_cnt; j ++) {
+ packstr(subcmd->info_keys[j], buf);
+ packstr(subcmd->info_vals[j], buf);
+ }
+ }
+}
+
+extern int
+spawn_req_unpack(spawn_req_t **req_ptr, Buf buf)
+{
+ spawn_req_t *req = NULL;
+ spawn_subcmd_t *subcmd = NULL;
+ uint32_t temp32;
+ int i, j;
+ void *auth_cred;
+ uid_t auth_uid, my_uid;
+
+ auth_cred = g_slurm_auth_unpack(buf);
+ if (auth_cred == NULL) {
+ error("authentication: %s",
+ g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) );
+ return SLURM_ERROR;
+ }
+ auth_uid = g_slurm_auth_get_uid(auth_cred, NULL);
+ (void) g_slurm_auth_destroy(auth_cred);
+ my_uid = getuid();
+ if ((auth_uid != 0) && (auth_uid != my_uid)) {
+ error("mpi/pmi2: spawn request apparently from uid %u",
+ (uint32_t) auth_uid);
+ return SLURM_ERROR;
+ }
+
+ req = xmalloc(sizeof(spawn_req_t));
+
+ safe_unpack32(&req->seq, buf);
+ safe_unpackstr_xmalloc(&req->from_node, &temp32, buf);
+ safe_unpack32(&req->subcmd_cnt, buf);
+ /* subcmd_cnt must be greater than 0 */
+ req->subcmds = xmalloc(req->subcmd_cnt * sizeof(spawn_subcmd_t *));
+ safe_unpack32(&req->preput_cnt, buf);
+ if (req->preput_cnt > 0) {
+ req->pp_keys = xmalloc(req->preput_cnt * sizeof(char *));
+ req->pp_vals = xmalloc(req->preput_cnt * sizeof(char *));
+ for (i = 0; i < req->preput_cnt; i ++) {
+ safe_unpackstr_xmalloc(&req->pp_keys[i], &temp32, buf);
+ safe_unpackstr_xmalloc(&req->pp_vals[i], &temp32, buf);
+ }
+ }
+ for (i = 0; i < req->subcmd_cnt; i ++) {
+ req->subcmds[i] = spawn_subcmd_new();
+ subcmd = req->subcmds[i];
+
+ safe_unpackstr_xmalloc(&(subcmd->cmd), &temp32, buf);
+ safe_unpack32(&(subcmd->max_procs), buf);
+ safe_unpack32(&(subcmd->argc), buf);
+ if (subcmd->argc > 0) {
+ subcmd->argv = xmalloc(subcmd->argc * sizeof(char *));
+ for (j = 0; j < subcmd->argc; j ++) {
+ safe_unpackstr_xmalloc(&(subcmd->argv[j]),
+ &temp32, buf);
+ }
+ }
+ safe_unpack32(&(subcmd->info_cnt), buf);
+ if (subcmd->info_cnt > 0) {
+ subcmd->info_keys = xmalloc(subcmd->info_cnt *
+ sizeof(char *));
+ subcmd->info_vals = xmalloc(subcmd->info_cnt *
+ sizeof(char *));
+ for (j = 0; j < subcmd->info_cnt; j ++) {
+ safe_unpackstr_xmalloc(&(subcmd->info_keys[j]),
+ &temp32, buf);
+ safe_unpackstr_xmalloc(&(subcmd->info_vals[j]),
+ &temp32, buf);
+ }
+ }
+ }
+ *req_ptr = req;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ spawn_req_free(req);
+ return SLURM_ERROR;
+}
+
+extern int
+spawn_req_send_to_srun(spawn_req_t *req, spawn_resp_t **resp_ptr)
+{
+ Buf req_buf = NULL, resp_buf = NULL;
+ int rc;
+ uint16_t cmd;
+
+ req_buf = init_buf(2048);
+ cmd = TREE_CMD_SPAWN;
+ pack16(cmd, req_buf);
+ spawn_req_pack(req, req_buf);
+ rc = tree_msg_to_srun_with_resp(get_buf_offset(req_buf),
+ get_buf_data(req_buf), &resp_buf);
+ free_buf(req_buf);
+
+ if (rc == SLURM_SUCCESS) {
+ rc = spawn_resp_unpack(resp_ptr, resp_buf);
+ free_buf(resp_buf);
+ }
+ return rc;
+}
+/**************************************************************/
+
+extern spawn_resp_t *
+spawn_resp_new(void)
+{
+ spawn_resp_t *resp;
+
+ resp = xmalloc(sizeof(spawn_resp_t));
+ return resp;
+}
+
+extern void
+spawn_resp_free(spawn_resp_t *resp)
+{
+ if (resp) {
+ xfree(resp->jobid);
+ xfree(resp->error_codes);
+ xfree(resp);
+ }
+}
+
+extern void
+spawn_resp_pack(spawn_resp_t *resp, Buf buf)
+{
+ int i;
+
+ pack32(resp->seq, buf);
+ pack32((uint32_t)resp->rc, buf);
+ packstr(resp->jobid, buf);
+ pack32(resp->error_cnt, buf);
+ for (i = 0; i < resp->error_cnt; i ++) {
+ pack32((uint32_t)resp->error_codes[i], buf);
+ }
+}
+
+extern int
+spawn_resp_unpack(spawn_resp_t **resp_ptr, Buf buf)
+{
+ spawn_resp_t *resp = NULL;
+ uint32_t temp32;
+ int i;
+
+ resp = xmalloc(sizeof(spawn_resp_t));
+
+ safe_unpack32(&resp->seq, buf);
+ safe_unpack32((uint32_t *)&resp->rc, buf);
+ safe_unpackstr_xmalloc(&resp->jobid, &temp32, buf);
+ safe_unpack32(&resp->error_cnt, buf);
+ if (resp->error_cnt > 0) {
+ resp->error_codes = xmalloc(resp->error_cnt * sizeof(int));
+ for (i = 0; i < resp->error_cnt; i ++) {
+ safe_unpack32((uint32_t *)&(resp->error_codes[i]), buf);
+ }
+ }
+ *resp_ptr = resp;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ spawn_resp_free(resp);
+ return SLURM_ERROR;
+}
+
+extern int
+spawn_resp_send_to_stepd(spawn_resp_t *resp, char *node)
+{
+ Buf buf;
+ int rc;
+ uint16_t cmd;
+
+ buf = init_buf(1024);
+
+ cmd = TREE_CMD_SPAWN_RESP;
+ pack16(cmd, buf);
+ spawn_resp_pack(resp, buf);
+
+ rc = tree_msg_to_stepds(node, get_buf_offset(buf), get_buf_data(buf));
+ free_buf(buf);
+ return rc;
+}
+
+extern int
+spawn_resp_send_to_srun(spawn_resp_t *resp)
+{
+ Buf buf;
+ int rc;
+ uint16_t cmd;
+
+ buf = init_buf(1024);
+
+ cmd = TREE_CMD_SPAWN_RESP;
+ pack16(cmd, buf);
+ spawn_resp_pack(resp, buf);
+
+ rc = tree_msg_to_srun(get_buf_offset(buf), get_buf_data(buf));
+ free_buf(buf);
+ return rc;
+}
+
+extern int
+spawn_resp_send_to_fd(spawn_resp_t *resp, int fd)
+{
+ Buf buf;
+ int rc;
+
+ buf = init_buf(1024);
+
+ /* sync with spawn_req_send_to_srun */
+/* cmd = TREE_CMD_SPAWN_RESP; */
+/* pack16(cmd, buf); */
+ spawn_resp_pack(resp, buf);
+ rc = _slurm_msg_sendto(fd, get_buf_data(buf), get_buf_offset(buf),
+ SLURM_PROTOCOL_NO_SEND_RECV_FLAGS);
+ free_buf(buf);
+
+ return rc;
+}
+
+/**************************************************************/
+
+extern int
+spawn_psr_enqueue(uint32_t seq, int fd, int lrank, char *from_node)
+{
+ psr_t *psr;
+
+ psr = xmalloc(sizeof(psr_t));
+ psr->seq = seq;
+ psr->fd = fd;
+ psr->lrank = lrank;
+ psr->from_node = xstrdup(from_node);
+ psr->next = psr_list;
+ psr_list = psr;
+ return SLURM_SUCCESS;
+}
+
+extern int
+spawn_psr_dequeue(uint32_t seq, int *fd, int *lrank, char **from_node)
+{
+ psr_t *psr, **pprev;
+
+ pprev = &psr_list;
+ psr = *pprev;
+ while(psr != NULL) {
+ if (psr->seq != seq) {
+ pprev = &(psr->next);
+ psr = *pprev;
+ continue;
+ }
+ /* found. remove the psr. */
+ *fd = psr->fd;
+ *lrank = psr->lrank;
+ *from_node = psr->from_node; /* take over ownership */
+ *pprev = psr->next;
+ xfree(psr);
+ return SLURM_SUCCESS;
+ }
+ return SLURM_ERROR;
+}
+
+extern uint32_t
+spawn_seq_next(void)
+{
+ return spawn_seq ++;
+}
+
+static int
+_exec_srun_single(spawn_req_t *req, char **env)
+{
+ int argc, i, j;
+ char **argv = NULL;
+ spawn_subcmd_t *subcmd;
+
+ debug3("mpi/mpi2: in _exec_srun_single");
+ subcmd = req->subcmds[0];
+ argc = subcmd->argc + 7;
+ xrealloc(argv, (argc + 1) * sizeof(char *));
+
+ j = 0;
+ argv[j ++] = "srun";
+ argv[j ++] = "--mpi=pmi2";
+ xstrfmtcat(argv[j ++], "--ntasks=%d", subcmd->max_procs);
+ /* TODO: inherit options from srun_opt. */
+ for (i = 0; i < subcmd->info_cnt; i ++) {
+ if (0) {
+
+ } else if (! strcmp(subcmd->info_keys[i], "host")) {
+ xstrfmtcat(argv[j ++], "--nodelist=%s",
+ subcmd->info_vals[i]);
+
+ } else if (! strcmp(subcmd->info_keys[i], "arch")) {
+ error("mpi/pmi2: spawn info key 'arch' not supported");
+
+ } else if (! strcmp(subcmd->info_keys[i], "wdir")) {
+ xstrfmtcat(argv[j ++], "--chdir=%s",
+ subcmd->info_vals[i]);
+
+ } else if(! strcmp(subcmd->info_keys[i], "path")) {
+ env_array_overwrite_fmt(&env, "PATH", "%s",
+ subcmd->info_vals[i]);
+
+ } else if (! strcmp(subcmd->info_keys[i], "file")) {
+ error("mpi/pmi2: spawn info key 'file' not supported");
+
+ } else if (! strcmp(subcmd->info_keys[i], "soft")) {
+ error("mpi/pmi2: spawn info key 'soft' not supported");
+
+ } else {
+ error("mpi/pmi2: unknown spawn info key '%s' ignored",
+ subcmd->info_keys[i]);
+ }
+ }
+ argv[j ++] = subcmd->cmd;
+ for (i = 0; i < subcmd->argc; i ++) {
+ argv[j ++] = subcmd->argv[i];
+ }
+ argv[j ++] = NULL;
+
+ {
+ debug3("mpi/mpi2: to execve");
+ for (i = 0; i < j; i ++) {
+ debug3("mpi/pmi2: argv[%d]=%s", i, argv[i]);
+ }
+ }
+ execve(SLURM_PREFIX"/bin/srun", argv, env);
+ error("mpi/pmi2: failed to exec srun: %m");
+ return SLURM_ERROR;
+}
+
+static int
+_exec_srun_multiple(spawn_req_t *req, char **env)
+{
+ int argc, ntasks, i, j, spawn_cnt, fd;
+ char **argv = NULL, *multi_prog = NULL, *buf = NULL;
+ spawn_subcmd_t *subcmd = NULL;
+
+ debug3("mpi/pmi2: in _exec_srun_multiple");
+ /* create a tmp multi_prog file */
+ /* TODO: how to delete the file? */
+ multi_prog = tempnam(NULL, NULL);
+ fd = open(multi_prog, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ error("mpi/pmi2: failed to open multi-prog file %s: %m",
+ multi_prog);
+ return SLURM_ERROR;
+ }
+ ntasks = 0;
+ for (spawn_cnt = 0; spawn_cnt < req->subcmd_cnt; spawn_cnt ++) {
+ subcmd = req->subcmds[spawn_cnt];
+ /* TODO: write a wrapper program to handle the info */
+ if (subcmd->info_cnt > 0) {
+ error("mpi/pmi2: spawn info ignored");
+ }
+ if (subcmd->max_procs == 1) {
+ xstrfmtcat(buf, "%d %s", ntasks, subcmd->cmd);
+ } else {
+ xstrfmtcat(buf, "%d-%d %s", ntasks,
+ ntasks + subcmd->max_procs - 1, subcmd->cmd);
+ }
+ for (i = 0; i < subcmd->argc; i ++) {
+ xstrfmtcat(buf, " %s", subcmd->argv[i]);
+ }
+ xstrcat(buf, "\n");
+ ntasks += subcmd->max_procs;
+ }
+ safe_write(fd, buf, strlen(buf));
+ xfree(buf);
+ close(fd);
+
+ argc = 7;
+ xrealloc(argv, argc * sizeof(char *));
+
+ j = 0;
+ argv[j ++] = "srun";
+ argv[j ++] = "--mpi=pmi2";
+ xstrfmtcat(argv[j ++], "--ntasks=%d", ntasks);
+ argv[j ++] = "--multi-prog";
+ argv[j ++] = multi_prog;
+ argv[j ++] = NULL;
+
+ debug3("mpi/mpi2: to execve");
+
+ execve(SLURM_PREFIX"/bin/srun", argv, env);
+ error("mpi/pmi2: failed to exec srun: %m");
+ return SLURM_ERROR;
+rwfail:
+ error("mpi/pmi2: failed to generate multi-prog file");
+ return SLURM_ERROR;
+}
+
+static void
+_setup_exec_srun(spawn_req_t *req)
+{
+ char **env, env_key[32];
+ int i, rc;
+ spawn_resp_t *resp;
+
+ debug3("mpi/pmi2: in _setup_exec_srun");
+
+ /* setup environments */
+ env = env_array_copy((const char **)job_info.job_env);
+ /* TODO: unset some env-vars */
+
+ env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_info.jobid);
+ env_array_overwrite_fmt(&env, PMI2_SPAWNER_JOBID_ENV, "%s",
+ job_info.pmi_jobid);
+ env_array_overwrite_fmt(&env, PMI2_PMI_JOBID_ENV, "%s-%u",
+ job_info.pmi_jobid, req->seq);
+ env_array_overwrite_fmt(&env, PMI2_SPAWN_SEQ_ENV, "%u", req->seq);
+ env_array_overwrite_fmt(&env, PMI2_SPAWNER_PORT_ENV, "%hu",
+ tree_info.pmi_port);
+ /* preput kvs */
+ env_array_overwrite_fmt(&env, PMI2_PREPUT_CNT_ENV, "%d",
+ req->preput_cnt);
+ for (i = 0; i < req->preput_cnt; i ++) {
+ snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i);
+ env_array_overwrite_fmt(&env, env_key, "%s", req->pp_keys[i]);
+ snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i);
+ env_array_overwrite_fmt(&env, env_key, "%s", req->pp_vals[i]);
+ }
+
+ if (req->subcmd_cnt == 1) {
+ /* no return if success */
+ rc = _exec_srun_single(req, env);
+ } else {
+ /* no return if success */
+ rc = _exec_srun_multiple(req, env);
+ }
+
+ resp = spawn_resp_new();
+ resp->seq = req->seq;
+ xstrfmtcat(resp->jobid, "%s-%u", job_info.pmi_jobid, req->seq);
+ resp->error_cnt = 0;
+ resp->rc = rc;
+
+ /* fake a srun address */
+ tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
+ slurm_set_addr(tree_info.srun_addr, tree_info.pmi_port,
+ "127.0.0.1");
+ spawn_resp_send_to_srun(resp);
+ spawn_resp_free(resp);
+ exit(errno);
+}
+
+extern int
+spawn_job_do_spawn(spawn_req_t *req)
+{
+ pid_t child_pid;
+
+ child_pid = fork();
+ if (child_pid < 0) {
+ error("mpi/pmi2: failed to fork srun");
+ return SLURM_ERROR;
+ } else if (child_pid == 0) { /* child */
+ _setup_exec_srun(req);
+ } else {
+ /* always serially executed, spawn_seq == req->seq + 1 */
+ xrealloc(spawned_srun_pids, spawn_seq * sizeof(pid_t));
+ spawned_srun_pids[req->seq] = child_pid;
+ return SLURM_SUCCESS;
+ }
+ return SLURM_ERROR;
+}
+
+static int
+_wait_for_all(void)
+{
+ pid_t child;
+ int i, status, exited;
+
+ exited = 0;
+ for (i = 1; i < spawn_seq; i ++) { /* seq 0 not used */
+ if (! spawned_srun_pids[i])
+ continue;
+ child = waitpid(spawned_srun_pids[i], &status, WNOHANG);
+ if (child == spawned_srun_pids[i]) {
+ spawned_srun_pids[i] = 0;
+ exited ++;
+ }
+ }
+ return exited;
+}
+
+extern void
+spawn_job_wait(void)
+{
+ int exited, i, wait;
+
+ if (job_info.srun_opt) {
+ wait = job_info.srun_opt->max_wait;
+ } else {
+ wait = 0;
+ }
+
+ if (wait == 0) /* TODO: wait indefinitely */
+ wait = 60;
+ exited = _wait_for_all();
+ while(wait > 0 && exited != spawn_seq - 1) {
+ sleep(1);
+ exited += _wait_for_all();
+ wait --;
+ }
+ for (i = 1; i < spawn_seq; i ++) {
+ if (!spawned_srun_pids[i])
+ continue;
+ /* terminte it */
+ kill(spawned_srun_pids[i], SIGTERM);
+ }
+}
diff --git a/src/plugins/mpi/pmi2/spawn.h b/src/plugins/mpi/pmi2/spawn.h
new file mode 100644
index 0000000..99da818
--- /dev/null
+++ b/src/plugins/mpi/pmi2/spawn.h
@@ -0,0 +1,112 @@
+/*****************************************************************************\
+ ** spawn.c - PMI job spawn handling
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _SPAWN_H
+#define _SPAWN_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+# if HAVE_INTTYPES_H
+# include <inttypes.h>
+# else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+# endif /* HAVE_INTTYPES_H */
+#else /* !HAVE_CONFIG_H */
+# include <inttypes.h>
+#endif /* HAVE_CONFIG_H */
+
+#include "src/common/pack.h"
+
+typedef struct spawn_subcmd {
+ char *cmd;
+ uint32_t max_procs;
+ uint32_t argc;
+ char **argv;
+ uint32_t info_cnt;
+ char **info_keys;
+ char **info_vals;
+} spawn_subcmd_t;
+
+typedef struct spawn_req {
+ uint32_t seq;
+ char *from_node;
+ uint32_t subcmd_cnt;
+ uint32_t preput_cnt;
+ char **pp_keys;
+ char **pp_vals;
+ spawn_subcmd_t **subcmds;
+ /* TODO: SLURM specific job control info */
+} spawn_req_t;
+
+typedef struct spawn_resp {
+ uint32_t seq;
+ int rc;
+ char *jobid;
+ uint32_t error_cnt;
+ int *error_codes;
+} spawn_resp_t;
+
+extern spawn_subcmd_t *spawn_subcmd_new(void);
+extern void spawn_subcmd_free(spawn_subcmd_t *subcmd);
+extern spawn_req_t *spawn_req_new(void);
+extern void spawn_req_free(spawn_req_t *req);
+extern void spawn_req_pack(spawn_req_t *req, Buf buf);
+extern int spawn_req_unpack(spawn_req_t **req_ptr, Buf buf);
+extern int spawn_req_send_to_srun(spawn_req_t *req, spawn_resp_t **resp_ptr);
+
+extern spawn_resp_t *spawn_resp_new(void);
+extern void spawn_resp_free(spawn_resp_t *resp);
+extern void spawn_resp_pack(spawn_resp_t *resp, Buf buf);
+extern int spawn_resp_unpack(spawn_resp_t **resp_ptr, Buf buf);
+extern int spawn_resp_send_to_stepd(spawn_resp_t *resp, char *node);
+extern int spawn_resp_send_to_fd(spawn_resp_t *resp, int fd);
+extern int spawn_resp_send_to_srun(spawn_resp_t *resp);
+
+extern int spawn_psr_enqueue(uint32_t seq, int fd, int lrank,
+ char *from_node);
+extern int spawn_psr_dequeue(uint32_t seq, int *fd, int *lrank,
+ char **from_node);
+
+extern uint32_t spawn_seq_next(void);
+
+extern int spawn_job_do_spawn(spawn_req_t *req);
+extern void spawn_job_wait(void);
+
+
+#endif /* _SPAWN_H */
diff --git a/src/plugins/mpi/pmi2/tree.c b/src/plugins/mpi/pmi2/tree.c
new file mode 100644
index 0000000..a3cdefe
--- /dev/null
+++ b/src/plugins/mpi/pmi2/tree.c
@@ -0,0 +1,417 @@
+/*****************************************************************************\
+ ** tree.c - PMI tree communication handling code
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "src/common/slurm_xlator.h"
+#include "src/common/slurm_protocol_interface.h"
+#include "src/common/slurm_protocol_api.h"
+
+#include "kvs.h"
+#include "spawn.h"
+#include "client.h"
+#include "setup.h"
+#include "pmi.h"
+
+static int _handle_kvs_fence(int fd, Buf buf);
+static int _handle_kvs_fence_resp(int fd, Buf buf);
+static int _handle_spawn(int fd, Buf buf);
+static int _handle_spawn_resp(int fd, Buf buf);
+
+static int (*tree_cmd_handlers[]) (int fd, Buf buf) = {
+ _handle_kvs_fence,
+ _handle_kvs_fence_resp,
+ _handle_spawn,
+ _handle_spawn_resp,
+ NULL
+};
+
+static char *tree_cmd_names[] = {
+ "TREE_CMD_KVS_FENCE",
+ "TREE_CMD_KVS_FENCE_RESP",
+ "TREE_CMD_SPAWN",
+ "TREE_CMD_SPAWN_RESP",
+ NULL,
+};
+
+static int
+_handle_kvs_fence(int fd, Buf buf)
+{
+ uint32_t from_nodeid, num_children, temp32;
+ char *from_node = NULL;
+
+ safe_unpack32(&from_nodeid, buf);
+ safe_unpackstr_xmalloc(&from_node, &temp32, buf);
+ safe_unpack32(&num_children, buf);
+
+ debug3("mpi/pmi2: in _handle_kvs_fence, from node %u(%s) representing"
+ " %u offspring", from_nodeid, from_node, num_children);
+
+ if (tasks_to_wait == 0 && children_to_wait == 0) {
+ tasks_to_wait = job_info.ltasks;
+ children_to_wait = tree_info.num_children;
+ }
+ children_to_wait -= num_children;
+
+ temp_kvs_merge(buf);
+
+ if (children_to_wait == 0 && tasks_to_wait == 0) {
+ temp_kvs_send();
+ }
+ debug3("mpi/pmi2: out _handle_kvs_fence, tasks_to_wait=%d, "
+ "children_to_wait=%d", tasks_to_wait, children_to_wait);
+ return SLURM_SUCCESS;
+
+unpack_error:
+ error("mpi/pmi2: failed to unpack kvs fence message");
+ return SLURM_ERROR;
+}
+
+static int
+_handle_kvs_fence_resp(int fd, Buf buf)
+{
+ char *key, *val;
+ int rc = 0, i = 0;
+ client_resp_t *resp;
+ uint32_t temp32;
+
+ debug3("mpi/pmi2: in _handle_kvs_fence_resp");
+ temp32 = remaining_buf(buf);
+ debug3("mpi/pmi2: buf length: %u", temp32);
+ /* put kvs into local hash */
+ while (remaining_buf(buf) > 0) {
+ safe_unpackstr_xmalloc(&key, &temp32, buf);
+ safe_unpackstr_xmalloc(&val, &temp32, buf);
+ kvs_put(key, val);
+ //temp32 = remaining_buf(buf);
+ xfree(key);
+ xfree(val);
+ }
+
+resp:
+ /* send fence_resp/barrier_out to tasks */
+ resp = client_resp_new();
+ if ( is_pmi11() ) {
+ client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" "
+ RC_KEY"=%d\n", rc);
+ } else if (is_pmi20()) {
+ client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";"
+ RC_KEY"=%d;", rc);
+ }
+ for (i = 0; i < job_info.ltasks; i ++) {
+ client_resp_send(resp, STEPD_PMI_SOCK(i));
+ }
+ client_resp_free(resp);
+ return rc;
+
+unpack_error:
+ error("mpi/pmi2: unpack kvs error in fence resp");
+ rc = SLURM_ERROR;
+ goto resp;
+}
+
+/* only called in srun */
+static int
+_handle_spawn(int fd, Buf buf)
+{
+ int rc;
+ spawn_req_t *req = NULL;
+ spawn_resp_t *resp = NULL;
+
+ debug3("mpi/pmi2: in _handle_spawn");
+
+ rc = spawn_req_unpack(&req, buf);
+ if (rc != SLURM_SUCCESS) {
+ error("mpi/pmi2: failed to unpack spawn request spawn cmd");
+ /* We lack a hostname to send response below.
+ resp = spawn_resp_new();
+ resp->rc = rc;
+ rc = spawn_resp_send_to_stepd(resp, req->from_node);
+ spawn_resp_free(resp); */
+ return rc;
+ }
+
+ /* assign a sequence number */
+ req->seq = spawn_seq_next();
+ resp = spawn_resp_new();
+ resp->seq = req->seq;
+ resp->jobid = NULL;
+ resp->error_cnt = 0;
+
+ /* fork srun */
+ rc = spawn_job_do_spawn(req);
+ if (rc != SLURM_SUCCESS) {
+ error("mpi/pmi2: failed to spawn job");
+ resp->rc = rc;
+ } else {
+ spawn_psr_enqueue(resp->seq, -1, -1, req->from_node);
+ resp->rc = SLURM_SUCCESS; /* temp resp */
+ }
+
+ spawn_resp_send_to_fd(resp, fd);
+
+ spawn_req_free(req);
+ spawn_resp_free(resp);
+
+ debug3("mpi/pmi2: out _handle_spawn");
+ return rc;
+}
+
+static int
+_send_task_spawn_resp_pmi20(spawn_resp_t *spawn_resp, int task_fd,
+ int task_lrank)
+{
+ int i, rc;
+ client_resp_t *task_resp;
+ char *error_codes = NULL;
+
+ task_resp = client_resp_new();
+ client_resp_append(task_resp,
+ CMD_KEY"="SPAWNRESP_CMD";"
+ RC_KEY"=%d;"
+ JOBID_KEY"=%s;",
+ spawn_resp->rc,
+ spawn_resp->jobid);
+ /* seems that simple2pmi does not consider rc */
+ if (spawn_resp->rc != SLURM_SUCCESS) {
+ xstrfmtcat(error_codes, "%d", spawn_resp->rc);
+ }
+ if (spawn_resp->error_cnt > 0) {
+ if (error_codes) {
+ xstrfmtcat(error_codes, ",%d", spawn_resp->error_codes[0]);
+ } else {
+ xstrfmtcat(error_codes, "%d", spawn_resp->error_codes[0]);
+ }
+
+ for (i = 1; i < spawn_resp->error_cnt; i ++) {
+ xstrfmtcat(error_codes, ",%d",
+ spawn_resp->error_codes[i]);
+ }
+ }
+ if (error_codes) {
+ client_resp_append(task_resp, ERRCODES_KEY"=%s;",
+ error_codes);
+ xfree(error_codes);
+ }
+
+ rc = client_resp_send(task_resp, task_fd);
+ client_resp_free(task_resp);
+ return rc;
+}
+
+static int
+_send_task_spawn_resp_pmi11(spawn_resp_t *spawn_resp, int task_fd,
+ int task_lrank)
+{
+ int i, rc;
+ client_resp_t *task_resp;
+ char *error_codes = NULL;
+
+ task_resp = client_resp_new();
+ client_resp_append(task_resp,
+ CMD_KEY"="SPAWNRESULT_CMD" "
+ RC_KEY"=%d "
+ JOBID_KEY"=%s", /* JOBID_KEY is not required */
+ spawn_resp->rc,
+ spawn_resp->jobid);
+
+ if (spawn_resp->rc != SLURM_SUCCESS) {
+ xstrfmtcat(error_codes, "%d", spawn_resp->rc);
+ }
+ if (spawn_resp->error_cnt > 0) {
+ if (error_codes) {
+ xstrfmtcat(error_codes, ",%d", spawn_resp->error_codes[0]);
+ } else {
+ xstrfmtcat(error_codes, "%d", spawn_resp->error_codes[0]);
+ }
+
+ for (i = 1; i < spawn_resp->error_cnt; i ++) {
+ xstrfmtcat(error_codes, ",%d",
+ spawn_resp->error_codes[i]);
+ }
+ }
+ if (error_codes) {
+ client_resp_append(task_resp, " "ERRCODES_KEY"=%s\n",
+ error_codes);
+ xfree(error_codes);
+ } else {
+ client_resp_append(task_resp, "\n");
+ }
+
+ rc = client_resp_send(task_resp, task_fd);
+ client_resp_free(task_resp);
+ return rc;
+}
+
+/* called in stepd and srun */
+static int
+_handle_spawn_resp(int fd, Buf buf)
+{
+ int rc, task_fd, task_lrank;
+ spawn_resp_t *spawn_resp;
+ char *from_node = NULL;
+
+ debug3("mpi/pmi2: in _handle_spawn_resp");
+
+ rc = spawn_resp_unpack(&spawn_resp, buf);
+ if (rc != SLURM_SUCCESS) {
+ error("mpi/pmi2: failed to unpack spawn response tree cmd");
+ return SLURM_ERROR;
+ }
+
+ rc = spawn_psr_dequeue(spawn_resp->seq, &task_fd, &task_lrank, &from_node);
+ if (rc != SLURM_SUCCESS) {
+ error("mpi/pmi2: spawn response not matched in psr list");
+ return SLURM_ERROR;
+ }
+
+ if (from_node == NULL) { /* stepd */
+ debug3("mpi/pmi2: spawned tasks of %s launched",
+ spawn_resp->jobid);
+ if (is_pmi20()) {
+ _send_task_spawn_resp_pmi20(spawn_resp, task_fd, task_lrank);
+ } else if (is_pmi11()) {
+ _send_task_spawn_resp_pmi11(spawn_resp, task_fd, task_lrank);
+ }
+ } else { /* srun */
+ debug3("mpi/pmi2: spawned tasks of %s launched",
+ spawn_resp->jobid);
+ /* forward resp to stepd */
+ spawn_resp_send_to_stepd(spawn_resp, from_node);
+ xfree(from_node);
+ }
+ spawn_resp_free(spawn_resp);
+
+ return rc;
+}
+
+/**************************************************************/
+extern int
+handle_tree_cmd(int fd)
+{
+ char *req_buf = NULL;
+ uint32_t len;
+ Buf buf = NULL;
+ uint16_t cmd;
+ int rc;
+
+ debug3("mpi/pmi2: in handle_tree_cmd");
+
+ safe_read(fd, &len, sizeof(uint32_t));
+ len = ntohl(len);
+
+ safe_read(fd, &cmd, sizeof(uint16_t));
+ cmd = ntohs(cmd);
+ if (cmd >= TREE_CMD_COUNT) {
+ error("mpi/pmi2: invalid tree req command");
+ return SLURM_ERROR;
+ }
+
+ len -= sizeof(cmd);
+ req_buf = xmalloc(len + 1);
+ safe_read(fd, req_buf, len);
+ buf = create_buf(req_buf, len); /* req_buf taken by buf */
+
+ debug3("mpi/pmi2: got tree cmd: %hu(%s)", cmd, tree_cmd_names[cmd]);
+ rc = tree_cmd_handlers[cmd](fd, buf);
+ free_buf (buf);
+ debug3("mpi/pmi2: out handle_tree_cmd");
+ return rc;
+
+rwfail:
+ xfree(req_buf);
+ return SLURM_ERROR;
+}
+
+extern int
+tree_msg_to_srun(uint32_t len, char *msg)
+{
+ int fd, rc;
+
+ fd = _slurm_open_stream(tree_info.srun_addr, true);
+ rc = _slurm_msg_sendto(fd, msg, len, SLURM_PROTOCOL_NO_SEND_RECV_FLAGS);
+ close(fd);
+ return rc;
+}
+
+extern int
+tree_msg_to_srun_with_resp(uint32_t len, char *msg, Buf *resp_ptr)
+{
+ int fd, rc;
+ Buf buf = NULL;
+ char *data = NULL;
+
+ xassert(resp_ptr != NULL);
+
+ fd = _slurm_open_stream(tree_info.srun_addr, true);
+ rc = _slurm_msg_sendto(fd, msg, len, SLURM_PROTOCOL_NO_SEND_RECV_FLAGS);
+ if (rc == len) { /* all data sent */
+ safe_read(fd, &len, sizeof(len));
+ len = ntohl(len);
+ data = xmalloc(len);
+ safe_read(fd, data, len);
+ buf = create_buf(data, len);
+ *resp_ptr = buf;
+ rc = SLURM_SUCCESS;
+ } else {
+ rc = SLURM_ERROR;
+ }
+ close(fd);
+ return rc;
+
+rwfail:
+ close (fd);
+ xfree(data);
+ return SLURM_ERROR;
+}
+
+extern int
+tree_msg_to_stepds(char *nodelist, uint32_t len, char *msg)
+{
+ int rc;
+ rc = slurm_forward_data(nodelist,
+ tree_sock_addr,
+ len,
+ msg);
+ return rc;
+}
+
diff --git a/src/plugins/mpi/pmi2/tree.h b/src/plugins/mpi/pmi2/tree.h
new file mode 100644
index 0000000..113c3ec
--- /dev/null
+++ b/src/plugins/mpi/pmi2/tree.h
@@ -0,0 +1,62 @@
+/*****************************************************************************\
+ ** tree.h - PMI tree communication handling code
+ *****************************************************************************
+ * Copyright (C) 2011-2012 National University of Defense Technology.
+ * Written by Hongjia Cao <hjcao@nudt.edu.cn>.
+ * All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <https://computing.llnl.gov/linux/slurm/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _TREE_H
+#define _TREE_H
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+enum {
+ TREE_CMD_KVS_FENCE,
+ TREE_CMD_KVS_FENCE_RESP,
+ TREE_CMD_SPAWN,
+ TREE_CMD_SPAWN_RESP,
+ TREE_CMD_COUNT
+};
+
+
+extern int handle_tree_cmd(int fd);
+extern int tree_msg_to_srun(uint32_t len, char *msg);
+extern int tree_msg_to_srun_with_resp(uint32_t len, char *msg, Buf *resp_ptr);
+extern int tree_msg_to_stepds(char *nodelist, uint32_t len, char *msg);
+
+
+
+
+#endif /* _TREE_H */
diff --git a/src/plugins/preempt/Makefile.in b/src/plugins/preempt/Makefile.in
index 0d13742..804dc1c 100644
--- a/src/plugins/preempt/Makefile.in
+++ b/src/plugins/preempt/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/preempt/none/Makefile.in b/src/plugins/preempt/none/Makefile.in
index cd293b0..17f128e 100644
--- a/src/plugins/preempt/none/Makefile.in
+++ b/src/plugins/preempt/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
preempt_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +410,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-preempt_none.la: $(preempt_none_la_OBJECTS) $(preempt_none_la_DEPENDENCIES)
+preempt_none.la: $(preempt_none_la_OBJECTS) $(preempt_none_la_DEPENDENCIES) $(EXTRA_preempt_none_la_DEPENDENCIES)
$(preempt_none_la_LINK) -rpath $(pkglibdir) $(preempt_none_la_OBJECTS) $(preempt_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -539,10 +547,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/preempt/partition_prio/Makefile.in b/src/plugins/preempt/partition_prio/Makefile.in
index ec86816..10ad4a5 100644
--- a/src/plugins/preempt/partition_prio/Makefile.in
+++ b/src/plugins/preempt/partition_prio/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
preempt_partition_prio_la_LIBADD =
@@ -210,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -246,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-preempt_partition_prio.la: $(preempt_partition_prio_la_OBJECTS) $(preempt_partition_prio_la_DEPENDENCIES)
+preempt_partition_prio.la: $(preempt_partition_prio_la_OBJECTS) $(preempt_partition_prio_la_DEPENDENCIES) $(EXTRA_preempt_partition_prio_la_DEPENDENCIES)
$(preempt_partition_prio_la_LINK) -rpath $(pkglibdir) $(preempt_partition_prio_la_OBJECTS) $(preempt_partition_prio_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/preempt/qos/Makefile.in b/src/plugins/preempt/qos/Makefile.in
index caec6e9..073372c 100644
--- a/src/plugins/preempt/qos/Makefile.in
+++ b/src/plugins/preempt/qos/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
preempt_qos_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +410,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-preempt_qos.la: $(preempt_qos_la_OBJECTS) $(preempt_qos_la_DEPENDENCIES)
+preempt_qos.la: $(preempt_qos_la_OBJECTS) $(preempt_qos_la_DEPENDENCIES) $(EXTRA_preempt_qos_la_DEPENDENCIES)
$(preempt_qos_la_LINK) -rpath $(pkglibdir) $(preempt_qos_la_OBJECTS) $(preempt_qos_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -539,10 +547,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/priority/Makefile.in b/src/plugins/priority/Makefile.in
index c77169b..7da0211 100644
--- a/src/plugins/priority/Makefile.in
+++ b/src/plugins/priority/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -573,10 +575,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/priority/basic/Makefile.in b/src/plugins/priority/basic/Makefile.in
index c3fe39d..f550c60 100644
--- a/src/plugins/priority/basic/Makefile.in
+++ b/src/plugins/priority/basic/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
priority_basic_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-priority_basic.la: $(priority_basic_la_OBJECTS) $(priority_basic_la_DEPENDENCIES)
+priority_basic.la: $(priority_basic_la_OBJECTS) $(priority_basic_la_DEPENDENCIES) $(EXTRA_priority_basic_la_DEPENDENCIES)
$(priority_basic_la_LINK) -rpath $(pkglibdir) $(priority_basic_la_OBJECTS) $(priority_basic_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/priority/basic/priority_basic.c b/src/plugins/priority/basic/priority_basic.c
index f82d740..e9a52ad 100644
--- a/src/plugins/priority/basic/priority_basic.c
+++ b/src/plugins/priority/basic/priority_basic.c
@@ -119,6 +119,7 @@
if (job_ptr->details)
new_prio -= (job_ptr->details->nice - NICE_OFFSET);
+ /* System hold is priority 0 */
if (new_prio < 1)
new_prio = 1;
@@ -155,7 +156,7 @@
}
extern List priority_p_get_priority_factors_list(
- priority_factors_request_msg_t *req_msg)
+ priority_factors_request_msg_t *req_msg, uid_t uid)
{
return(list_create(NULL));
}
diff --git a/src/plugins/priority/multifactor/Makefile.in b/src/plugins/priority/multifactor/Makefile.in
index 01a5b95..0977c61 100644
--- a/src/plugins/priority/multifactor/Makefile.in
+++ b/src/plugins/priority/multifactor/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
priority_multifactor_la_DEPENDENCIES =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-priority_multifactor.la: $(priority_multifactor_la_OBJECTS) $(priority_multifactor_la_DEPENDENCIES)
+priority_multifactor.la: $(priority_multifactor_la_OBJECTS) $(priority_multifactor_la_DEPENDENCIES) $(EXTRA_priority_multifactor_la_DEPENDENCIES)
$(priority_multifactor_la_LINK) -rpath $(pkglibdir) $(priority_multifactor_la_OBJECTS) $(priority_multifactor_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -543,10 +551,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c
index 6c5b2bb..e66e48b 100644
--- a/src/plugins/priority/multifactor/priority_multifactor.c
+++ b/src/plugins/priority/multifactor/priority_multifactor.c
@@ -111,8 +111,8 @@
* minimum version for their plugins as the job completion logging API
* matures.
*/
-const char plugin_name[] = "Priority MULTIFACTOR plugin";
-const char plugin_type[] = "priority/multifactor";
+const char plugin_name[] = "Priority MULTIFACTOR plugin";
+const char plugin_type[] = "priority/multifactor";
const uint32_t plugin_version = 100;
static pthread_t decay_handler_thread;
@@ -128,6 +128,7 @@
static uint32_t weight_js; /* weight for Job Size factor */
static uint32_t weight_part; /* weight for Partition factor */
static uint32_t weight_qos; /* weight for QOS factor */
+static uint32_t flags; /* Priority Flags */
extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc);
extern double priority_p_calc_fs_factor(long double usage_efctv,
@@ -156,10 +157,11 @@
else if (!calc_fairshare)
return SLURM_SUCCESS;
+ assoc_mgr_lock(&locks);
+
xassert(assoc_mgr_association_list);
xassert(assoc_mgr_qos_list);
- assoc_mgr_lock(&locks);
itr = list_iterator_create(assoc_mgr_association_list);
/* We want to do this to all associations including
root. All usage_raws are calculated from the bottom up.
@@ -186,7 +188,7 @@
* This should be called every PriorityUsageResetPeriod
* RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else.
*/
-static int _reset_usage()
+static int _reset_usage(void)
{
ListIterator itr = NULL;
slurmdb_association_rec_t *assoc = NULL;
@@ -197,9 +199,10 @@
if (!calc_fairshare)
return SLURM_SUCCESS;
+ assoc_mgr_lock(&locks);
+
xassert(assoc_mgr_association_list);
- assoc_mgr_lock(&locks);
itr = list_iterator_create(assoc_mgr_association_list);
/* We want to do this to all associations including
root. All usage_raws are calculated from the bottom up.
@@ -458,12 +461,31 @@
qos_ptr = (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
if (weight_age) {
- uint32_t diff = start_time - job_ptr->details->begin_time;
+ uint32_t diff = 0;
+ time_t use_time;
+
+ if (flags & PRIORITY_FLAGS_ACCRUE_ALWAYS)
+ use_time = job_ptr->details->submit_time;
+ else
+ use_time = job_ptr->details->begin_time;
+
+ /* Only really add an age priority if the use_time is
+ past the start_time.
+ */
+ if (start_time > use_time)
+ diff = start_time - use_time;
+
if (job_ptr->details->begin_time) {
- if (diff < max_age)
+ if (diff < max_age) {
job_ptr->prio_factors->priority_age =
(double)diff / (double)max_age;
- else
+ } else
+ job_ptr->prio_factors->priority_age = 1.0;
+ } else if (flags & PRIORITY_FLAGS_ACCRUE_ALWAYS) {
+ if (diff < max_age) {
+ job_ptr->prio_factors->priority_age =
+ (double)diff / (double)max_age;
+ } else
job_ptr->prio_factors->priority_age = 1.0;
}
}
@@ -533,7 +555,7 @@
double priority = 0.0;
priority_factors_object_t pre_factors;
- if (job_ptr->direct_set_prio && (job_ptr->priority > 1))
+ if (job_ptr->direct_set_prio && (job_ptr->priority > 0))
return job_ptr->priority;
if (!job_ptr->details) {
@@ -542,23 +564,17 @@
job_ptr->job_id);
return 0;
}
- /*
- * This means the job is not eligible yet
- */
- if (!job_ptr->details->begin_time
- || (job_ptr->details->begin_time > start_time))
- return 1;
/* figure out the priority */
_get_priority_factors(start_time, job_ptr);
memcpy(&pre_factors, job_ptr->prio_factors,
sizeof(priority_factors_object_t));
- job_ptr->prio_factors->priority_age *= (double)weight_age;
- job_ptr->prio_factors->priority_fs *= (double)weight_fs;
- job_ptr->prio_factors->priority_js *= (double)weight_js;
+ job_ptr->prio_factors->priority_age *= (double)weight_age;
+ job_ptr->prio_factors->priority_fs *= (double)weight_fs;
+ job_ptr->prio_factors->priority_js *= (double)weight_js;
job_ptr->prio_factors->priority_part *= (double)weight_part;
- job_ptr->prio_factors->priority_qos *= (double)weight_qos;
+ job_ptr->prio_factors->priority_qos *= (double)weight_qos;
priority = job_ptr->prio_factors->priority_age
+ job_ptr->prio_factors->priority_fs
@@ -567,12 +583,9 @@
+ job_ptr->prio_factors->priority_qos
- (double)(job_ptr->prio_factors->nice - NICE_OFFSET);
- /*
- * 0 means the job is held; 1 means system hold
- * so 2 is the lowest non-held priority
- */
- if (priority < 2)
- priority = 2;
+ /* Priority 0 is reserved for held jobs */
+ if (priority < 1)
+ priority = 1;
if (priority_debug) {
info("Weighted Age priority is %f * %u = %.2f",
@@ -699,6 +712,7 @@
if (itr == NULL)
fatal("list_iterator_create: malloc failure");
+ assoc_mgr_lock(&locks);
while ((job_ptr = list_next(itr))) {
if (priority_debug)
debug2("job: %u",job_ptr->job_id);
@@ -714,7 +728,6 @@
delta = job_ptr->total_cpus * (last_ran - job_ptr->start_time);
- assoc_mgr_lock(&locks);
qos = (slurmdb_qos_rec_t *) job_ptr->qos_ptr;
assoc = (slurmdb_association_rec_t *) job_ptr->assoc_ptr;
@@ -743,8 +756,8 @@
assoc->usage->grp_used_cpu_run_secs -= delta;
assoc = assoc->usage->parent_assoc_ptr;
}
- assoc_mgr_unlock(&locks);
}
+ assoc_mgr_unlock(&locks);
list_iterator_destroy(itr);
unlock_slurmctld(job_read_lock);
}
@@ -1039,12 +1052,10 @@
}
/*
- * This means the job is held, 0, or a system
- * hold, 1. Continue also if the job is not
- * pending. There is no reason to set the
- * priority if the job isn't pending.
+ * Priority 0 is reserved for held jobs. Also skip
+ * priority calculation for non-pending jobs.
*/
- if ((job_ptr->priority <= 1)
+ if ((job_ptr->priority == 0)
|| !IS_JOB_PENDING(job_ptr))
continue;
@@ -1140,6 +1151,7 @@
weight_js = slurm_get_priority_weight_job_size();
weight_part = slurm_get_priority_weight_partition();
weight_qos = slurm_get_priority_weight_qos();
+ flags = slurmctld_conf.priority_flags;
if (priority_debug) {
info("priority: Max Age is %u", max_age);
@@ -1148,6 +1160,7 @@
info("priority: Weight JobSize is %u", weight_js);
info("priority: Weight Part is %u", weight_part);
info("priority: Weight QOS is %u", weight_qos);
+ info("priority: Flags is %u", flags);
}
}
@@ -1342,7 +1355,7 @@
}
extern List priority_p_get_priority_factors_list(
- priority_factors_request_msg_t *req_msg)
+ priority_factors_request_msg_t *req_msg, uid_t uid)
{
List req_job_list;
List req_user_list;
@@ -1372,6 +1385,7 @@
*/
if (!IS_JOB_PENDING(job_ptr))
continue;
+
/*
* This means the job is not eligible yet
*/
@@ -1380,9 +1394,9 @@
continue;
/*
- * 0 means the job is held; 1 means system hold
+ * 0 means the job is held
*/
- if (job_ptr->priority <= 1)
+ if (job_ptr->priority == 0)
continue;
/*
@@ -1395,10 +1409,10 @@
continue;
if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS)
- && (job_ptr->user_id != req_msg->uid)
- && !validate_operator(req_msg->uid)
+ && (job_ptr->user_id != uid)
+ && !validate_operator(uid)
&& !assoc_mgr_is_user_acct_coord(
- acct_db_conn, req_msg->uid,
+ acct_db_conn, uid,
job_ptr->account))
continue;
diff --git a/src/plugins/proctrack/Makefile.in b/src/plugins/proctrack/Makefile.in
index 396c6b8..eaad5d9 100644
--- a/src/plugins/proctrack/Makefile.in
+++ b/src/plugins/proctrack/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -578,10 +580,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/aix/Makefile.in b/src/plugins/proctrack/aix/Makefile.in
index 33ff405..724f6a8 100644
--- a/src/plugins/proctrack/aix/Makefile.in
+++ b/src/plugins/proctrack/aix/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_aix_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_aix.la: $(proctrack_aix_la_OBJECTS) $(proctrack_aix_la_DEPENDENCIES)
+proctrack_aix.la: $(proctrack_aix_la_OBJECTS) $(proctrack_aix_la_DEPENDENCIES) $(EXTRA_proctrack_aix_la_DEPENDENCIES)
$(proctrack_aix_la_LINK) -rpath $(pkglibdir) $(proctrack_aix_la_OBJECTS) $(proctrack_aix_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -543,10 +551,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/cgroup/Makefile.in b/src/plugins/proctrack/cgroup/Makefile.in
index 9222117..bb5daad 100644
--- a/src/plugins/proctrack/cgroup/Makefile.in
+++ b/src/plugins/proctrack/cgroup/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_cgroup_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_cgroup.la: $(proctrack_cgroup_la_OBJECTS) $(proctrack_cgroup_la_DEPENDENCIES)
+proctrack_cgroup.la: $(proctrack_cgroup_la_OBJECTS) $(proctrack_cgroup_la_DEPENDENCIES) $(EXTRA_proctrack_cgroup_la_DEPENDENCIES)
$(proctrack_cgroup_la_LINK) -rpath $(pkglibdir) $(proctrack_cgroup_la_OBJECTS) $(proctrack_cgroup_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
index 5de335d..d7026f9 100644
--- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c
+++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
@@ -45,6 +45,11 @@
#include <inttypes.h>
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
+
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
#include "src/common/log.h"
@@ -142,7 +147,8 @@
if (slurm_cgroup_conf.cgroup_automount) {
if (xcgroup_ns_mount(&freezer_ns)) {
error("unable to mount freezer cgroup"
- " namespace");
+ " namespace: %s",
+ slurm_strerror(errno));
return SLURM_ERROR;
}
info("cgroup namespace '%s' is now mounted", "freezer");
diff --git a/src/plugins/proctrack/linuxproc/Makefile.in b/src/plugins/proctrack/linuxproc/Makefile.in
index 7d27e9b..3b07d12 100644
--- a/src/plugins/proctrack/linuxproc/Makefile.in
+++ b/src/plugins/proctrack/linuxproc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_linuxproc_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -409,7 +417,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_linuxproc.la: $(proctrack_linuxproc_la_OBJECTS) $(proctrack_linuxproc_la_DEPENDENCIES)
+proctrack_linuxproc.la: $(proctrack_linuxproc_la_OBJECTS) $(proctrack_linuxproc_la_DEPENDENCIES) $(EXTRA_proctrack_linuxproc_la_DEPENDENCIES)
$(proctrack_linuxproc_la_LINK) -rpath $(pkglibdir) $(proctrack_linuxproc_la_OBJECTS) $(proctrack_linuxproc_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c
index 283851e..19c23c2 100644
--- a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c
+++ b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c
@@ -47,6 +47,10 @@
# include <inttypes.h>
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <sys/types.h>
#include "slurm/slurm.h"
diff --git a/src/plugins/proctrack/lua/Makefile.in b/src/plugins/proctrack/lua/Makefile.in
index d8ad09b..6b6e574 100644
--- a/src/plugins/proctrack/lua/Makefile.in
+++ b/src/plugins/proctrack/lua/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_lua_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -403,7 +411,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_lua.la: $(proctrack_lua_la_OBJECTS) $(proctrack_lua_la_DEPENDENCIES)
+proctrack_lua.la: $(proctrack_lua_la_OBJECTS) $(proctrack_lua_la_DEPENDENCIES) $(EXTRA_proctrack_lua_la_DEPENDENCIES)
$(proctrack_lua_la_LINK) -rpath $(pkglibdir) $(proctrack_lua_la_OBJECTS) $(proctrack_lua_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -540,10 +548,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/pgid/Makefile.in b/src/plugins/proctrack/pgid/Makefile.in
index 1088284..b6fbe0c 100644
--- a/src/plugins/proctrack/pgid/Makefile.in
+++ b/src/plugins/proctrack/pgid/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_pgid_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_pgid.la: $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_DEPENDENCIES)
+proctrack_pgid.la: $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_DEPENDENCIES) $(EXTRA_proctrack_pgid_la_DEPENDENCIES)
$(proctrack_pgid_la_LINK) -rpath $(pkglibdir) $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/rms/Makefile.in b/src/plugins/proctrack/rms/Makefile.in
index af17893..4e50eac 100644
--- a/src/plugins/proctrack/rms/Makefile.in
+++ b/src/plugins/proctrack/rms/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_rms_la_DEPENDENCIES =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -405,7 +413,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_rms.la: $(proctrack_rms_la_OBJECTS) $(proctrack_rms_la_DEPENDENCIES)
+proctrack_rms.la: $(proctrack_rms_la_OBJECTS) $(proctrack_rms_la_DEPENDENCIES) $(EXTRA_proctrack_rms_la_DEPENDENCIES)
$(proctrack_rms_la_LINK) $(am_proctrack_rms_la_rpath) $(proctrack_rms_la_OBJECTS) $(proctrack_rms_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/proctrack/sgi_job/Makefile.in b/src/plugins/proctrack/sgi_job/Makefile.in
index 4d02cfc..d3f5b17 100644
--- a/src/plugins/proctrack/sgi_job/Makefile.in
+++ b/src/plugins/proctrack/sgi_job/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
proctrack_sgi_job_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +410,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-proctrack_sgi_job.la: $(proctrack_sgi_job_la_OBJECTS) $(proctrack_sgi_job_la_DEPENDENCIES)
+proctrack_sgi_job.la: $(proctrack_sgi_job_la_OBJECTS) $(proctrack_sgi_job_la_DEPENDENCIES) $(EXTRA_proctrack_sgi_job_la_DEPENDENCIES)
$(proctrack_sgi_job_la_LINK) -rpath $(pkglibdir) $(proctrack_sgi_job_la_OBJECTS) $(proctrack_sgi_job_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -539,10 +547,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/Makefile.in b/src/plugins/sched/Makefile.in
index a334121..ff52b73 100644
--- a/src/plugins/sched/Makefile.in
+++ b/src/plugins/sched/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/backfill/Makefile.in b/src/plugins/sched/backfill/Makefile.in
index 6199ff3..db29b23 100644
--- a/src/plugins/sched/backfill/Makefile.in
+++ b/src/plugins/sched/backfill/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
sched_backfill_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -406,7 +414,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-sched_backfill.la: $(sched_backfill_la_OBJECTS) $(sched_backfill_la_DEPENDENCIES)
+sched_backfill.la: $(sched_backfill_la_OBJECTS) $(sched_backfill_la_DEPENDENCIES) $(EXTRA_sched_backfill_la_DEPENDENCIES)
$(sched_backfill_la_LINK) -rpath $(pkglibdir) $(sched_backfill_la_OBJECTS) $(sched_backfill_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -544,10 +552,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index 1db0239..7e1bc64 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -94,6 +94,12 @@
#define BACKFILL_WINDOW (24 * 60 * 60)
#endif
+/* Length of uid/njobs arrays used for limiting the number of jobs
+ * per user considered in each backfill iteration */
+#ifndef BF_MAX_USERS
+# define BF_MAX_USERS 1000
+#endif
+
#define SLURMCTLD_THREAD_LIMIT 5
typedef struct node_space_map {
@@ -102,7 +108,10 @@
bitstr_t *avail_bitmap;
int next; /* next record, by time, zero termination */
} node_space_map_t;
-int backfilled_jobs = 0;
+
+/* Diag statistics */
+extern diag_stats_t slurmctld_diag_stats;
+int bf_last_ints = 0;
/*********************** local variables *********************/
static bool stop_backfill = false;
@@ -115,6 +124,7 @@
static int backfill_resolution = BACKFILL_RESOLUTION;
static int backfill_window = BACKFILL_WINDOW;
static int max_backfill_job_cnt = 50;
+static int max_backfill_job_per_user = 0;
/*********************** local functions *********************/
static void _add_reservation(uint32_t start_time, uint32_t end_reserve,
@@ -369,12 +379,22 @@
fatal("Invalid backfill scheduler max_job_bf: %d",
max_backfill_job_cnt);
}
+ /* "bf_res=" is vestigial from version 2.3 and can be removed later.
+ * Only "bf_resolution=" is documented. */
if (sched_params && (tmp_ptr=strstr(sched_params, "bf_res=")))
backfill_resolution = atoi(tmp_ptr + 7);
+ if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution=")))
+ backfill_resolution = atoi(tmp_ptr + 14);
if (backfill_resolution < 1) {
fatal("Invalid backfill scheduler resolution: %d",
backfill_resolution);
}
+ if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user=")))
+ max_backfill_job_per_user = atoi(tmp_ptr + 16);
+ if (max_backfill_job_per_user < 0) {
+ fatal("Invalid backfill scheduler bf_max_job_user: %d",
+ max_backfill_job_per_user);
+ }
xfree(sched_params);
}
@@ -385,6 +405,32 @@
config_flag = true;
}
+static void _do_diag_stats(struct timeval *tv1, struct timeval *tv2)
+{
+ long delta_t;
+ long bf_interval_usecs = backfill_interval * 1000000;
+
+ delta_t = (tv2->tv_sec - tv1->tv_sec) * 1000000;
+ delta_t += tv2->tv_usec - tv1->tv_usec;
+
+ slurmctld_diag_stats.bf_cycle_counter++;
+ slurmctld_diag_stats.bf_cycle_sum += (delta_t -(bf_last_ints *
+ bf_interval_usecs));
+ slurmctld_diag_stats.bf_cycle_last = delta_t - (bf_last_ints *
+ bf_interval_usecs);
+ slurmctld_diag_stats.bf_depth_sum += slurmctld_diag_stats.bf_last_depth;
+ slurmctld_diag_stats.bf_depth_try_sum += slurmctld_diag_stats.
+ bf_last_depth_try;
+ if (slurmctld_diag_stats.bf_cycle_last >
+ slurmctld_diag_stats.bf_cycle_max) {
+ slurmctld_diag_stats.bf_cycle_max = slurmctld_diag_stats.
+ bf_cycle_last;
+ }
+
+ slurmctld_diag_stats.bf_active = 0;
+}
+
+
/* backfill_agent - detached thread periodically attempts to backfill jobs */
extern void *backfill_agent(void *args)
{
@@ -433,6 +479,7 @@
part_update = last_part_update;
unlock_slurmctld(all_locks);
+ bf_last_ints++;
_my_sleep(secs);
lock_slurmctld(all_locks);
@@ -461,9 +508,13 @@
bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
time_t now, sched_start, later_start, start_res;
node_space_map_t *node_space;
+ struct timeval bf_time1, bf_time2;
static int sched_timeout = 0;
int this_sched_timeout = 0, rc = 0;
int job_test_count = 0;
+ uint32_t *uid = NULL, nuser = 0;
+ uint16_t *njobs = NULL;
+ bool already_counted;
#ifdef HAVE_CRAY
/*
@@ -500,12 +551,23 @@
filter_root = true;
job_queue = build_job_queue(true);
- if (list_count(job_queue) <= 1) {
+ if (list_count(job_queue) == 0) {
debug("backfill: no jobs to backfill");
list_destroy(job_queue);
return 0;
}
+ gettimeofday(&bf_time1, NULL);
+
+ slurmctld_diag_stats.bf_queue_len = list_count(job_queue);
+ slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats.
+ bf_queue_len;
+ slurmctld_diag_stats.bf_last_depth = 0;
+ slurmctld_diag_stats.bf_last_depth_try = 0;
+ slurmctld_diag_stats.bf_when_last_cycle = now;
+ bf_last_ints = 0;
+ slurmctld_diag_stats.bf_active = 1;
+
node_space = xmalloc(sizeof(node_space_map_t) *
(max_backfill_job_cnt + 3));
node_space[0].begin_time = sched_start;
@@ -516,6 +578,10 @@
if (debug_flags & DEBUG_FLAG_BACKFILL)
_dump_node_space_table(node_space);
+ if (max_backfill_job_per_user) {
+ uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t));
+ njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t));
+ }
while ((job_queue_rec = (job_queue_rec_t *)
list_pop_bottom(job_queue, sort_job_queue2))) {
job_test_count++;
@@ -529,20 +595,43 @@
if (debug_flags & DEBUG_FLAG_BACKFILL)
info("backfill test for job %u", job_ptr->job_id);
- if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) ||
- !acct_policy_job_runnable(job_ptr)) {
- debug2("backfill: job %u is not allowed to run now. "
- "Skipping it. State=%s. Reason=%s. Priority=%u",
- job_ptr->job_id,
- job_state_string(job_ptr->job_state),
- job_reason_string(job_ptr->state_reason),
- job_ptr->priority);
- continue;
+ slurmctld_diag_stats.bf_last_depth++;
+ already_counted = false;
+
+ if (max_backfill_job_per_user) {
+ for (j = 0; j < nuser; j++) {
+ if (job_ptr->user_id == uid[j]) {
+ njobs[j]++;
+ debug2("backfill: user %u: #jobs %u",
+ uid[j], njobs[j]);
+ break;
+ }
+ }
+ if (j == nuser) { /* user not found */
+ if (nuser < BF_MAX_USERS) {
+ uid[j] = job_ptr->user_id;
+ njobs[j] = 1;
+ nuser++;
+ } else {
+ error("backfill: too many users in "
+ "queue. Consider increasing "
+ "BF_MAX_USERS");
+ }
+ debug2("backfill: found new user %u. "
+ "Total #users now %u",
+ job_ptr->user_id, nuser);
+ } else {
+ if (njobs[j] > max_backfill_job_per_user) {
+ /* skip job */
+ debug("backfill: have already checked "
+ "%u jobs for user %u; skipping "
+ "job %u",
+ max_backfill_job_per_user,
+ job_ptr->user_id,
+ job_ptr->job_id);
+ continue;
+ }
+ }
}
if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
@@ -684,6 +773,12 @@
/* this is the time consuming operation */
debug2("backfill: entering _try_sched for job %u.",
job_ptr->job_id);
+
+ if (!already_counted) {
+ slurmctld_diag_stats.bf_last_depth_try++;
+ already_counted = true;
+ }
+
j = _try_sched(job_ptr, &avail_bitmap,
min_nodes, max_nodes, req_nodes);
debug2("backfill: finished _try_sched for job %u.",
@@ -772,6 +867,8 @@
if (debug_flags & DEBUG_FLAG_BACKFILL)
_dump_node_space_table(node_space);
}
+ xfree(uid);
+ xfree(njobs);
FREE_NULL_BITMAP(avail_bitmap);
FREE_NULL_BITMAP(resv_bitmap);
@@ -782,6 +879,8 @@
}
xfree(node_space);
list_destroy(job_queue);
+ gettimeofday(&bf_time2, NULL);
+ _do_diag_stats(&bf_time1, &bf_time2);
if (debug_flags & DEBUG_FLAG_BACKFILL) {
END_TIMER;
info("backfill: completed testing %d jobs, %s",
@@ -818,10 +917,11 @@
srun_allocate(job_ptr->job_id);
else if (job_ptr->details->prolog_running == 0)
launch_job(job_ptr);
- backfilled_jobs++;
+ slurmctld_diag_stats.backfilled_jobs++;
+ slurmctld_diag_stats.last_backfilled_jobs++;
if (debug_flags & DEBUG_FLAG_BACKFILL) {
- info("backfill: Jobs backfilled since boot: %d",
- backfilled_jobs);
+ info("backfill: Jobs backfilled since boot: %u",
+ slurmctld_diag_stats.backfilled_jobs);
}
} else if ((job_ptr->job_id != fail_jobid) &&
(rc != ESLURM_ACCOUNTING_POLICY)) {
diff --git a/src/plugins/sched/builtin/Makefile.in b/src/plugins/sched/builtin/Makefile.in
index 3ef85b4..7194c66 100644
--- a/src/plugins/sched/builtin/Makefile.in
+++ b/src/plugins/sched/builtin/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
sched_builtin_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -407,7 +415,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-sched_builtin.la: $(sched_builtin_la_OBJECTS) $(sched_builtin_la_DEPENDENCIES)
+sched_builtin.la: $(sched_builtin_la_OBJECTS) $(sched_builtin_la_DEPENDENCIES) $(EXTRA_sched_builtin_la_DEPENDENCIES)
$(sched_builtin_la_LINK) -rpath $(pkglibdir) $(sched_builtin_la_OBJECTS) $(sched_builtin_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -545,10 +553,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/builtin/builtin.c b/src/plugins/sched/builtin/builtin.c
index 9ea5aa3..377a6f1 100644
--- a/src/plugins/sched/builtin/builtin.c
+++ b/src/plugins/sched/builtin/builtin.c
@@ -161,6 +161,8 @@
}
/* Determine minimum and maximum node counts */
+ /* On BlueGene systems don't adjust the min/max node limits
+ here. We are working on midplane values. */
min_nodes = MAX(job_ptr->details->min_nodes,
part_ptr->min_nodes);
diff --git a/src/plugins/sched/builtin/builtin_wrapper.c b/src/plugins/sched/builtin/builtin_wrapper.c
index 3dfe166..e9868e4 100644
--- a/src/plugins/sched/builtin/builtin_wrapper.c
+++ b/src/plugins/sched/builtin/builtin_wrapper.c
@@ -151,9 +151,6 @@
/**************************************************************************/
/* TAG( slurm_sched_plugin_job_is_pending ) */
-/* This entire implementation does nothing more than calculate the */
-/* expected start time for pending jobs. The logic is borrowed from */
-/* backfill.c */
/**************************************************************************/
void slurm_sched_plugin_job_is_pending( void )
{
diff --git a/src/plugins/sched/hold/Makefile.in b/src/plugins/sched/hold/Makefile.in
index cd4aeef..ee4eab2 100644
--- a/src/plugins/sched/hold/Makefile.in
+++ b/src/plugins/sched/hold/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
sched_hold_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -403,7 +411,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-sched_hold.la: $(sched_hold_la_OBJECTS) $(sched_hold_la_DEPENDENCIES)
+sched_hold.la: $(sched_hold_la_OBJECTS) $(sched_hold_la_DEPENDENCIES) $(EXTRA_sched_hold_la_DEPENDENCIES)
$(sched_hold_la_LINK) -rpath $(pkglibdir) $(sched_hold_la_OBJECTS) $(sched_hold_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -540,10 +548,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/wiki/Makefile.in b/src/plugins/sched/wiki/Makefile.in
index 220f4e6..966e379 100644
--- a/src/plugins/sched/wiki/Makefile.in
+++ b/src/plugins/sched/wiki/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
sched_wiki_la_LIBADD =
@@ -210,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -246,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -419,7 +427,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-sched_wiki.la: $(sched_wiki_la_OBJECTS) $(sched_wiki_la_DEPENDENCIES)
+sched_wiki.la: $(sched_wiki_la_OBJECTS) $(sched_wiki_la_DEPENDENCIES) $(EXTRA_sched_wiki_la_DEPENDENCIES)
$(sched_wiki_la_LINK) -rpath $(pkglibdir) $(sched_wiki_la_OBJECTS) $(sched_wiki_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -566,10 +574,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/wiki2/Makefile.in b/src/plugins/sched/wiki2/Makefile.in
index bbe7f11..7311b5c 100644
--- a/src/plugins/sched/wiki2/Makefile.in
+++ b/src/plugins/sched/wiki2/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
sched_wiki2_la_LIBADD =
@@ -213,6 +219,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -249,6 +256,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -430,7 +438,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-sched_wiki2.la: $(sched_wiki2_la_OBJECTS) $(sched_wiki2_la_DEPENDENCIES)
+sched_wiki2.la: $(sched_wiki2_la_OBJECTS) $(sched_wiki2_la_DEPENDENCIES) $(EXTRA_sched_wiki2_la_DEPENDENCIES)
$(sched_wiki2_la_LINK) -rpath $(pkglibdir) $(sched_wiki2_la_OBJECTS) $(sched_wiki2_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -585,10 +593,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/sched/wiki2/get_jobs.c b/src/plugins/sched/wiki2/get_jobs.c
index 3e004d5..d371735 100644
--- a/src/plugins/sched/wiki2/get_jobs.c
+++ b/src/plugins/sched/wiki2/get_jobs.c
@@ -347,6 +347,11 @@
xstrcat(buf, tmp);
}
+ if (job_ptr->resp_host) {
+ snprintf(tmp, sizeof(tmp),"SUBMITHOST=\"%s\";", job_ptr->resp_host);
+ xstrcat(buf, tmp);
+ }
+
if (job_ptr->wckey) {
if ((quote = strchr(job_ptr->wckey, (int) '\"'))) {
/* Moab does not like strings containing a quote */
diff --git a/src/plugins/select/Makefile.in b/src/plugins/select/Makefile.in
index 18729c7..3975d29 100644
--- a/src/plugins/select/Makefile.in
+++ b/src/plugins/select/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -576,10 +578,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/Makefile.am b/src/plugins/select/bluegene/Makefile.am
index 21f928d..6266282 100644
--- a/src/plugins/select/bluegene/Makefile.am
+++ b/src/plugins/select/bluegene/Makefile.am
@@ -13,7 +13,8 @@
noinst_LTLIBRARIES = libba_common.la libconfigure_api.la
-convenience_libs = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+convenience_libs = $(top_builddir)/src/api/libslurmhelper.la \
+ -lpthread $(DL_LIBS)
# These are needed for pack/unpack of structures for cross-cluster stuff
select_bluegene_la_SOURCES = select_bluegene.c \
@@ -22,7 +23,7 @@
libba_common_la_SOURCES = ba_common.c ba_common.h
-libconfigure_api_la_SOURCES = configure_api.c
+libconfigure_api_la_SOURCES = configure_api.c configure_api.h
select_bluegene_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
select_bluegene_la_LIBADD = libba_common.la
@@ -57,16 +58,32 @@
pkglib_LTLIBRARIES += runjob_plugin.la
runjob_plugin_la_SOURCES = runjob_plugin.cc
- runjob_plugin_la_LDFLAGS = -export-dynamic $(SO_LDFLAGS) $(PLUGIN_FLAGS) \
- $(convenience_libs) -lpthread
-
+ runjob_plugin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+ runjob_plugin_la_LIBADD = $(convenience_libs)
endif
-select_bluegene_la_SOURCES += bg_core.c bg_defined_block.c \
- bg_dynamic_block.c bg_job_place.c \
- bg_job_run.c bg_list_functions.c \
- bg_read_config.c bg_record_functions.c \
- bg_status.c
+select_bluegene_la_SOURCES += \
+ bg_core.c \
+ bg_core.h \
+ bg_defined_block.c \
+ bg_defined_block.h \
+ bg_dynamic_block.c \
+ bg_dynamic_block.h \
+ bg_enums.h \
+ bg_job_place.c \
+ bg_job_place.h \
+ bg_job_run.c \
+ bg_job_run.h \
+ bg_list_functions.c \
+ bg_list_functions.h \
+ bg_read_config.c \
+ bg_read_config.h \
+ bg_record_functions.c \
+ bg_record_functions.h \
+ bg_status.c \
+ bg_status.h \
+ bg_structs.h \
+ bridge_linker.h
select_bluegene_la_LIBADD += $(block_allocator_lib) $(BG_LDFLAGS)
diff --git a/src/plugins/select/bluegene/Makefile.in b/src/plugins/select/bluegene/Makefile.in
index 65f9ece..4d1dc42 100644
--- a/src/plugins/select/bluegene/Makefile.in
+++ b/src/plugins/select/bluegene/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -43,11 +43,28 @@
@BGL_LOADED_FALSE@@BG_L_P_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am__append_3 = libsched_if.la
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am__append_4 = bl_bgq ba_bgq
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am__append_5 = runjob_plugin.la
-@BLUEGENE_LOADED_TRUE@am__append_6 = bg_core.c bg_defined_block.c \
-@BLUEGENE_LOADED_TRUE@ bg_dynamic_block.c bg_job_place.c \
-@BLUEGENE_LOADED_TRUE@ bg_job_run.c bg_list_functions.c \
-@BLUEGENE_LOADED_TRUE@ bg_read_config.c bg_record_functions.c \
-@BLUEGENE_LOADED_TRUE@ bg_status.c
+@BLUEGENE_LOADED_TRUE@am__append_6 = \
+@BLUEGENE_LOADED_TRUE@ bg_core.c \
+@BLUEGENE_LOADED_TRUE@ bg_core.h \
+@BLUEGENE_LOADED_TRUE@ bg_defined_block.c \
+@BLUEGENE_LOADED_TRUE@ bg_defined_block.h \
+@BLUEGENE_LOADED_TRUE@ bg_dynamic_block.c \
+@BLUEGENE_LOADED_TRUE@ bg_dynamic_block.h \
+@BLUEGENE_LOADED_TRUE@ bg_enums.h \
+@BLUEGENE_LOADED_TRUE@ bg_job_place.c \
+@BLUEGENE_LOADED_TRUE@ bg_job_place.h \
+@BLUEGENE_LOADED_TRUE@ bg_job_run.c \
+@BLUEGENE_LOADED_TRUE@ bg_job_run.h \
+@BLUEGENE_LOADED_TRUE@ bg_list_functions.c \
+@BLUEGENE_LOADED_TRUE@ bg_list_functions.h \
+@BLUEGENE_LOADED_TRUE@ bg_read_config.c \
+@BLUEGENE_LOADED_TRUE@ bg_read_config.h \
+@BLUEGENE_LOADED_TRUE@ bg_record_functions.c \
+@BLUEGENE_LOADED_TRUE@ bg_record_functions.h \
+@BLUEGENE_LOADED_TRUE@ bg_status.c \
+@BLUEGENE_LOADED_TRUE@ bg_status.h \
+@BLUEGENE_LOADED_TRUE@ bg_structs.h \
+@BLUEGENE_LOADED_TRUE@ bridge_linker.h
@BLUEGENE_LOADED_TRUE@am__append_7 = $(block_allocator_lib) $(BG_LDFLAGS)
@BLUEGENE_LOADED_TRUE@sbin_PROGRAMS = slurm_prolog$(EXEEXT) \
@@ -120,6 +137,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)" "$(DESTDIR)$(sbindir)"
LTLIBRARIES = $(noinst_LTLIBRARIES) $(pkglib_LTLIBRARIES)
libba_common_la_LIBADD =
@@ -146,7 +169,11 @@
$(libsched_if64_la_LDFLAGS) $(LDFLAGS) -o $@
@BGL_LOADED_TRUE@@BG_L_P_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am_libsched_if64_la_rpath = -rpath \
@BGL_LOADED_TRUE@@BG_L_P_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@ $(pkglibdir)
-runjob_plugin_la_LIBADD =
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(top_builddir)/src/api/libslurmhelper.la \
+ $(am__DEPENDENCIES_1)
+@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@runjob_plugin_la_DEPENDENCIES = \
+@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_2)
am__runjob_plugin_la_SOURCES_DIST = runjob_plugin.cc
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am_runjob_plugin_la_OBJECTS = \
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@ runjob_plugin.lo
@@ -156,16 +183,18 @@
$(CXXFLAGS) $(runjob_plugin_la_LDFLAGS) $(LDFLAGS) -o $@
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@am_runjob_plugin_la_rpath = \
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@ -rpath $(pkglibdir)
-am__DEPENDENCIES_1 =
-@BLUEGENE_LOADED_TRUE@am__DEPENDENCIES_2 = $(block_allocator_lib) \
+@BLUEGENE_LOADED_TRUE@am__DEPENDENCIES_3 = $(block_allocator_lib) \
@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_1)
select_bluegene_la_DEPENDENCIES = libba_common.la \
- $(am__DEPENDENCIES_2)
+ $(am__DEPENDENCIES_3)
am__select_bluegene_la_SOURCES_DIST = select_bluegene.c bg_job_info.c \
bg_job_info.h bg_node_info.c bg_node_info.h bg_core.c \
- bg_defined_block.c bg_dynamic_block.c bg_job_place.c \
- bg_job_run.c bg_list_functions.c bg_read_config.c \
- bg_record_functions.c bg_status.c
+ bg_core.h bg_defined_block.c bg_defined_block.h \
+ bg_dynamic_block.c bg_dynamic_block.h bg_enums.h \
+ bg_job_place.c bg_job_place.h bg_job_run.c bg_job_run.h \
+ bg_list_functions.c bg_list_functions.h bg_read_config.c \
+ bg_read_config.h bg_record_functions.c bg_record_functions.h \
+ bg_status.c bg_status.h bg_structs.h bridge_linker.h
@BLUEGENE_LOADED_TRUE@am__objects_1 = bg_core.lo bg_defined_block.lo \
@BLUEGENE_LOADED_TRUE@ bg_dynamic_block.lo bg_job_place.lo \
@BLUEGENE_LOADED_TRUE@ bg_job_run.lo bg_list_functions.lo \
@@ -182,10 +211,8 @@
@BLUEGENE_LOADED_TRUE@am_slurm_epilog_OBJECTS = \
@BLUEGENE_LOADED_TRUE@ slurm_epilog.$(OBJEXT)
slurm_epilog_OBJECTS = $(am_slurm_epilog_OBJECTS)
-am__DEPENDENCIES_3 = $(top_builddir)/src/api/libslurm.o \
- $(am__DEPENDENCIES_1)
@BLUEGENE_LOADED_TRUE@slurm_epilog_DEPENDENCIES = \
-@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_3)
+@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_2)
slurm_epilog_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(slurm_epilog_LDFLAGS) $(LDFLAGS) -o $@
@@ -194,7 +221,7 @@
@BLUEGENE_LOADED_TRUE@ slurm_prolog.$(OBJEXT)
slurm_prolog_OBJECTS = $(am_slurm_prolog_OBJECTS)
@BLUEGENE_LOADED_TRUE@slurm_prolog_DEPENDENCIES = \
-@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_3)
+@BLUEGENE_LOADED_TRUE@ $(am__DEPENDENCIES_2)
slurm_prolog_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(slurm_prolog_LDFLAGS) $(LDFLAGS) -o $@
@@ -351,6 +378,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -387,6 +415,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -482,13 +511,15 @@
pkglib_LTLIBRARIES = select_bluegene.la $(am__append_2) \
$(am__append_3) $(am__append_5)
noinst_LTLIBRARIES = libba_common.la libconfigure_api.la
-convenience_libs = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+convenience_libs = $(top_builddir)/src/api/libslurmhelper.la \
+ -lpthread $(DL_LIBS)
+
# These are needed for pack/unpack of structures for cross-cluster stuff
select_bluegene_la_SOURCES = select_bluegene.c bg_job_info.c \
bg_job_info.h bg_node_info.c bg_node_info.h $(am__append_6)
libba_common_la_SOURCES = ba_common.c ba_common.h
-libconfigure_api_la_SOURCES = configure_api.c
+libconfigure_api_la_SOURCES = configure_api.c configure_api.h
select_bluegene_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
select_bluegene_la_LIBADD = libba_common.la $(am__append_7)
@BLUEGENE_LOADED_TRUE@SUBDIRS = sfree $(am__append_1) $(am__append_4)
@@ -500,9 +531,8 @@
@BG_L_P_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@block_allocator_lib = ba/libblock_allocator.la
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@nodist_EXTRA_select_bluegene_la_SOURCES = dummy.cxx
@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@runjob_plugin_la_SOURCES = runjob_plugin.cc
-@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@runjob_plugin_la_LDFLAGS = -export-dynamic $(SO_LDFLAGS) $(PLUGIN_FLAGS) \
-@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@ $(convenience_libs) -lpthread
-
+@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@runjob_plugin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+@BGQ_LOADED_TRUE@@BLUEGENE_LOADED_TRUE@runjob_plugin_la_LIBADD = $(convenience_libs)
@BLUEGENE_LOADED_TRUE@slurm_prolog_LDADD = $(convenience_libs)
@BLUEGENE_LOADED_TRUE@slurm_prolog_SOURCES = slurm_prolog.c
@BLUEGENE_LOADED_TRUE@slurm_prolog_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
@@ -583,17 +613,17 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libba_common.la: $(libba_common_la_OBJECTS) $(libba_common_la_DEPENDENCIES)
+libba_common.la: $(libba_common_la_OBJECTS) $(libba_common_la_DEPENDENCIES) $(EXTRA_libba_common_la_DEPENDENCIES)
$(LINK) $(libba_common_la_OBJECTS) $(libba_common_la_LIBADD) $(LIBS)
-libconfigure_api.la: $(libconfigure_api_la_OBJECTS) $(libconfigure_api_la_DEPENDENCIES)
+libconfigure_api.la: $(libconfigure_api_la_OBJECTS) $(libconfigure_api_la_DEPENDENCIES) $(EXTRA_libconfigure_api_la_DEPENDENCIES)
$(LINK) $(libconfigure_api_la_OBJECTS) $(libconfigure_api_la_LIBADD) $(LIBS)
-libsched_if.la: $(libsched_if_la_OBJECTS) $(libsched_if_la_DEPENDENCIES)
+libsched_if.la: $(libsched_if_la_OBJECTS) $(libsched_if_la_DEPENDENCIES) $(EXTRA_libsched_if_la_DEPENDENCIES)
$(libsched_if_la_LINK) $(am_libsched_if_la_rpath) $(libsched_if_la_OBJECTS) $(libsched_if_la_LIBADD) $(LIBS)
-libsched_if64.la: $(libsched_if64_la_OBJECTS) $(libsched_if64_la_DEPENDENCIES)
+libsched_if64.la: $(libsched_if64_la_OBJECTS) $(libsched_if64_la_DEPENDENCIES) $(EXTRA_libsched_if64_la_DEPENDENCIES)
$(libsched_if64_la_LINK) $(am_libsched_if64_la_rpath) $(libsched_if64_la_OBJECTS) $(libsched_if64_la_LIBADD) $(LIBS)
-runjob_plugin.la: $(runjob_plugin_la_OBJECTS) $(runjob_plugin_la_DEPENDENCIES)
+runjob_plugin.la: $(runjob_plugin_la_OBJECTS) $(runjob_plugin_la_DEPENDENCIES) $(EXTRA_runjob_plugin_la_DEPENDENCIES)
$(runjob_plugin_la_LINK) $(am_runjob_plugin_la_rpath) $(runjob_plugin_la_OBJECTS) $(runjob_plugin_la_LIBADD) $(LIBS)
-select_bluegene.la: $(select_bluegene_la_OBJECTS) $(select_bluegene_la_DEPENDENCIES)
+select_bluegene.la: $(select_bluegene_la_OBJECTS) $(select_bluegene_la_DEPENDENCIES) $(EXTRA_select_bluegene_la_DEPENDENCIES)
$(select_bluegene_la_LINK) -rpath $(pkglibdir) $(select_bluegene_la_OBJECTS) $(select_bluegene_la_LIBADD) $(LIBS)
install-sbinPROGRAMS: $(sbin_PROGRAMS)
@$(NORMAL_INSTALL)
@@ -638,10 +668,10 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-slurm_epilog$(EXEEXT): $(slurm_epilog_OBJECTS) $(slurm_epilog_DEPENDENCIES)
+slurm_epilog$(EXEEXT): $(slurm_epilog_OBJECTS) $(slurm_epilog_DEPENDENCIES) $(EXTRA_slurm_epilog_DEPENDENCIES)
@rm -f slurm_epilog$(EXEEXT)
$(slurm_epilog_LINK) $(slurm_epilog_OBJECTS) $(slurm_epilog_LDADD) $(LIBS)
-slurm_prolog$(EXEEXT): $(slurm_prolog_OBJECTS) $(slurm_prolog_DEPENDENCIES)
+slurm_prolog$(EXEEXT): $(slurm_prolog_OBJECTS) $(slurm_prolog_DEPENDENCIES) $(EXTRA_slurm_prolog_DEPENDENCIES)
@rm -f slurm_prolog$(EXEEXT)
$(slurm_prolog_LINK) $(slurm_prolog_OBJECTS) $(slurm_prolog_LDADD) $(LIBS)
@@ -951,10 +981,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/ba/Makefile.in b/src/plugins/select/bluegene/ba/Makefile.in
index 63c4451..ef73973 100644
--- a/src/plugins/select/bluegene/ba/Makefile.in
+++ b/src/plugins/select/bluegene/ba/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -198,6 +198,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -234,6 +235,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -383,7 +385,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libblock_allocator.la: $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_DEPENDENCIES)
+libblock_allocator.la: $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_DEPENDENCIES) $(EXTRA_libblock_allocator_la_DEPENDENCIES)
$(libblock_allocator_la_LINK) $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@@ -394,7 +396,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-wire_test$(EXEEXT): $(wire_test_OBJECTS) $(wire_test_DEPENDENCIES)
+wire_test$(EXEEXT): $(wire_test_OBJECTS) $(wire_test_DEPENDENCIES) $(EXTRA_wire_test_DEPENDENCIES)
@rm -f wire_test$(EXEEXT)
$(wire_test_LINK) $(wire_test_OBJECTS) $(wire_test_LDADD) $(LIBS)
@@ -530,10 +532,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/ba/block_allocator.c b/src/plugins/select/bluegene/ba/block_allocator.c
index a0af0de..41e2e19 100644
--- a/src/plugins/select/bluegene/ba/block_allocator.c
+++ b/src/plugins/select/bluegene/ba/block_allocator.c
@@ -886,46 +886,48 @@
* IN/OUT results - a list with a NULL destroyer filled in with
* midplanes and wires set to create the block with the api. If
* only interested in the hostlist NULL can be excepted also.
- * IN start - where to start the allocation.
- * IN geometry - the requested geometry of the block.
- * IN conn_type - mesh, torus, or small.
+ * IN ba_request - request for the block
+ *
+ * To be set in the ba_request
+ * start - where to start the allocation. (optional)
+ * geometry or size - the requested geometry of the block. (required)
+ * conn_type - mesh, torus, or small. (required)
*
* RET char * - hostlist of midplanes results represent must be
* xfreed. NULL on failure
*/
-extern char *set_bg_block(List results, uint16_t *start,
- uint16_t *geometry, uint16_t *conn_type)
+extern char *set_bg_block(List results, select_ba_request_t* ba_request)
{
char *name = NULL;
ba_mp_t* ba_node = NULL;
- int size = 0;
int send_results = 0;
int found = 0;
+ xassert(ba_request);
if (cluster_dims == 1) {
- if (start[X]>=DIM_SIZE[X])
+ if (ba_request->start[X]>=DIM_SIZE[X])
return NULL;
- size = geometry[X];
- ba_node = &ba_main_grid[start[X]][0][0];
+ ba_request->size = ba_request->geometry[X];
+ ba_node = &ba_main_grid[ba_request->start[X]][0][0];
} else {
- if (start[X]>=DIM_SIZE[X]
- || start[Y]>=DIM_SIZE[Y]
- || start[Z]>=DIM_SIZE[Z])
- return NULL;
+ int dim;
- if (geometry[X] <= 0 || geometry[Y] <= 0 || geometry[Z] <= 0) {
- error("problem with geometry %c%c%c, needs to be "
- "at least 111",
- alpha_num[geometry[X]],
- alpha_num[geometry[Y]],
- alpha_num[geometry[Z]]);
- return NULL;
+ ba_request->size = 1;
+ for (dim=0; dim<cluster_dims; dim++) {
+ if (ba_request->start[dim] >= DIM_SIZE[dim])
+ return NULL;
+ if ((int16_t)ba_request->geometry[dim] <= 0) {
+ error("problem with geometry of %c in dim %d, "
+ "needs to be at least 1",
+ alpha_num[ba_request->geometry[dim]],
+ dim);
+ return NULL;
+ }
+ ba_request->size *= ba_request->geometry[dim];
}
- /* info("looking at %d%d%d", geometry[X], */
- /* geometry[Y], geometry[Z]); */
- size = geometry[X] * geometry[Y] * geometry[Z];
- ba_node = &ba_main_grid[start[X]][start[Y]][start[Z]];
+
+ ba_node = coord2ba_mp(ba_request->start);
}
if (!ba_node)
@@ -939,21 +941,24 @@
/* This midplane should have already been checked if it was in
use or not */
list_append(results, ba_node);
- if (conn_type[0] >= SELECT_SMALL) {
+
+ if (ba_request->conn_type[0] >= SELECT_SMALL) {
/* adding the ba_node and ending */
ba_node->used |= BA_MP_USED_TRUE;
name = xstrdup_printf("%s", ba_node->coord_str);
goto end_it;
- }
+ } else if (ba_request->conn_type[0] == SELECT_NAV)
+ ba_request->conn_type[0] = bg_conf->default_conn_type[0];
+
found = _find_x_path(results, ba_node,
ba_node->coord,
- geometry[X],
+ ba_request->geometry[X],
1,
- conn_type[0], BLOCK_ALGO_FIRST);
+ ba_request->conn_type[0], BLOCK_ALGO_FIRST);
if (!found) {
bool is_small = 0;
- if (conn_type[0] == SELECT_SMALL)
+ if (ba_request->conn_type[0] == SELECT_SMALL)
is_small = 1;
debug2("trying less efficient code");
remove_block(results, is_small);
@@ -961,9 +966,10 @@
list_append(results, ba_node);
found = _find_x_path(results, ba_node,
ba_node->coord,
- geometry[X],
+ ba_request->geometry[X],
1,
- conn_type[0], BLOCK_ALGO_SECOND);
+ ba_request->conn_type[0],
+ BLOCK_ALGO_SECOND);
}
if (found) {
if (cluster_flags & CLUSTER_FLAG_BG) {
@@ -979,8 +985,8 @@
if (!_fill_in_coords(results,
start_list,
- geometry,
- conn_type[0])) {
+ ba_request->geometry,
+ ba_request->conn_type[0])) {
list_destroy(start_list);
goto end_it;
}
@@ -991,8 +997,8 @@
}
name = _set_internal_wires(results,
- size,
- conn_type[0]);
+ ba_request->size,
+ ba_request->conn_type[0]);
end_it:
if (!send_results && results) {
list_destroy(results);
@@ -1822,33 +1828,66 @@
}
}
-extern ba_mp_t *ba_pick_sub_block_cnodes(
- bg_record_t *bg_record, uint32_t *node_count, select_jobinfo_t *jobinfo)
+extern bool ba_sub_block_in_bitmap(select_jobinfo_t *jobinfo,
+ bitstr_t *usable_bitmap, bool step)
{
/* This shouldn't be called. */
xassert(0);
- return NULL;
+ return false;
}
-extern int ba_clear_sub_block_cnodes(
+extern int ba_sub_block_in_bitmap_clear(select_jobinfo_t *jobinfo,
+ bitstr_t *usable_bitmap)
+{
+ /* this doesn't do anything since above doesn't. */
+ return SLURM_SUCCESS;
+}
+
+extern ba_mp_t *ba_sub_block_in_record(
+ bg_record_t *bg_record, uint32_t *node_count,
+ select_jobinfo_t *jobinfo)
+{
+ /* This shouldn't be called. */
+ xassert(0);
+ return false;
+}
+
+extern int ba_sub_block_in_record_clear(
bg_record_t *bg_record, struct step_record *step_ptr)
{
/* this doesn't do anything since above doesn't. */
return SLURM_SUCCESS;
}
+extern void ba_sync_job_to_block(bg_record_t *bg_record,
+ struct job_record *job_ptr)
+{
+ xassert(bg_record);
+ xassert(job_ptr);
+
+ bg_record->job_running = job_ptr->job_id;
+ bg_record->job_ptr = job_ptr;
+}
+
+
extern bitstr_t *ba_create_ba_mp_cnode_bitmap(bg_record_t *bg_record)
{
return NULL;
}
-extern char *ba_set_ionode_str(bitstr_t *bitmap)
+extern void ba_set_ionode_str(bg_record_t *bg_record)
{
char bitstring[BITSIZE];
- if (bitmap) {
- bit_fmt(bitstring, BITSIZE, bitmap);
- return xstrdup(bitstring);
- }
+ if (!bg_record->ionode_bitmap)
+ return;
+
+ bit_fmt(bitstring, BITSIZE, bg_record->ionode_bitmap);
+ bg_record->ionode_str = xstrdup(bitstring);
+}
+
+extern struct job_record *ba_remove_job_in_block_job_list(
+ bg_record_t *bg_record, struct job_record *in_job_ptr)
+{
return NULL;
}
@@ -1868,29 +1907,15 @@
static int _find_match(select_ba_request_t *ba_request, List results)
{
int x=0;
- uint16_t start[cluster_dims];
+
ba_mp_t *ba_node = NULL;
char *name=NULL;
- int startx;
+ int startx = DIM_SIZE[X]-1;
uint16_t *geo_ptr;
if (!(cluster_flags & CLUSTER_FLAG_BG))
return 0;
- memset(start, 0, sizeof(start));
- startx = (start[X]-1);
-
- if (startx == -1)
- startx = DIM_SIZE[X]-1;
- if (ba_request->start_req) {
- for(x=0;x<cluster_dims;x++) {
- if (ba_request->start[x]>=DIM_SIZE[x])
- return 0;
- start[x] = ba_request->start[x];
- }
- }
- x=0;
-
/* set up the geo here */
if (!(geo_ptr = list_peek(ba_request->elongate_geos)))
return 0;
@@ -1919,28 +1944,20 @@
alpha_num[ba_request->geometry[Z]],
x);
new_node:
+ ba_node = coord2ba_mp(ba_request->start);
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
- info("starting at %c%c%c",
- alpha_num[start[X]],
- alpha_num[start[Y]],
- alpha_num[start[Z]]);
-
- ba_node = &ba_main_grid[start[X]][start[Y]][start[Z]];
+ info("starting at %s", ba_node->coord_str);
if (!_node_used(ba_node, ba_request->geometry[X])) {
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("trying this node %c%c%c %c%c%c %d",
- alpha_num[start[X]],
- alpha_num[start[Y]],
- alpha_num[start[Z]],
+ info("trying this node %s %c%c%c %d",
+ ba_node->coord_str,
alpha_num[ba_request->geometry[X]],
alpha_num[ba_request->geometry[Y]],
alpha_num[ba_request->geometry[Z]],
ba_request->conn_type[X]);
- name = set_bg_block(results,
- start,
- ba_request->geometry,
- ba_request->conn_type);
+ name = set_bg_block(results, ba_request);
+
if (name) {
ba_request->save_name = xstrdup(name);
xfree(name);
@@ -1961,28 +1978,28 @@
}
- if ((DIM_SIZE[Z]-start[Z]-1)
+ if ((DIM_SIZE[Z] - ba_request->start[Z]-1)
>= ba_request->geometry[Z])
- start[Z]++;
+ ba_request->start[Z]++;
else {
- start[Z] = 0;
- if ((DIM_SIZE[Y]-start[Y]-1)
+ ba_request->start[Z] = 0;
+ if ((DIM_SIZE[Y] - ba_request->start[Y]-1)
>= ba_request->geometry[Y])
- start[Y]++;
+ ba_request->start[Y]++;
else {
- start[Y] = 0;
- if ((DIM_SIZE[X]-start[X]-1)
+ ba_request->start[Y] = 0;
+ if ((DIM_SIZE[X] - ba_request->start[X]-1)
>= ba_request->geometry[X])
- start[X]++;
+ ba_request->start[X]++;
else {
if (ba_request->size == 1)
goto requested_end;
if (!_check_for_options(ba_request))
return 0;
else {
- start[X]=0;
- start[Y]=0;
- start[Z]=0;
+ memset(ba_request->start, 0,
+ sizeof(ba_request->
+ start));
goto start_again;
}
}
diff --git a/src/plugins/select/bluegene/ba_bgq/Makefile.in b/src/plugins/select/bluegene/ba_bgq/Makefile.in
index 1eb7076..65810c1 100644
--- a/src/plugins/select/bluegene/ba_bgq/Makefile.in
+++ b/src/plugins/select/bluegene/ba_bgq/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -208,6 +208,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +245,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -396,7 +398,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libblock_allocator.la: $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_DEPENDENCIES)
+libblock_allocator.la: $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_DEPENDENCIES) $(EXTRA_libblock_allocator_la_DEPENDENCIES)
$(libblock_allocator_la_LINK) $(libblock_allocator_la_OBJECTS) $(libblock_allocator_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@@ -407,7 +409,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-wire_test$(EXEEXT): $(wire_test_OBJECTS) $(wire_test_DEPENDENCIES)
+wire_test$(EXEEXT): $(wire_test_OBJECTS) $(wire_test_DEPENDENCIES) $(EXTRA_wire_test_DEPENDENCIES)
@rm -f wire_test$(EXEEXT)
$(wire_test_LINK) $(wire_test_OBJECTS) $(wire_test_LDADD) $(LIBS)
@@ -565,10 +567,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c
index 09bdb43..587c493 100644
--- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c
+++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c
@@ -5,8 +5,9 @@
*****************************************************************************
* Copyright (C) 2004-2007 The Regents of the University of California.
* Copyright (C) 2008-2011 Lawrence Livermore National Security.
+ * Copyright (C) 2011 SchedMD LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Danny Auble <da@llnl.gov>
+ * Written by Danny Auble <da@schedmd.com>
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
@@ -49,6 +50,7 @@
#include "src/common/uid.h"
#include "src/common/timers.h"
#include "src/common/slurmdb_defs.h"
+#include "../bg_list_functions.h"
#define DEBUG_PA
#define BEST_COUNT_INIT 20
@@ -64,27 +66,17 @@
/* _ba_system is the "current" system that the structures will work
* on */
ba_mp_t ****ba_main_grid = NULL;
+ba_geo_system_t *ba_mp_geo_system = NULL;
static ba_geo_system_t *ba_main_geo_system = NULL;
-static ba_geo_system_t *ba_mp_geo_system = NULL;
static uint16_t *deny_pass = NULL;
static ba_nc_coords_t g_nc_coords[16];
-
+static ba_mp_t **ba_main_grid_array = NULL;
/* increment Y -> Z -> A -> X -> E
* used for doing nodecard coords */
static int ba_nc_dim_order[5] = {Y, Z, A, X, E};
/** internal helper functions */
-/* */
-static int _check_for_options(select_ba_request_t* ba_request);
-
-/* */
-static int _fill_in_coords(List results, int level, ba_mp_t *start_mp,
- ba_mp_t **check_mp, int *block_start,
- int *block_end, int *coords);
-
-static int _finish_torus(List results, int level, int *block_start,
- int *block_end, uint16_t *conn_type, int *coords);
/* */
static char *_copy_from_main(List main_mps, List ret_list);
@@ -93,15 +85,12 @@
static char *_reset_altered_mps(List main_mps, bool get_name);
/* */
-static int _copy_ba_switch(ba_mp_t *ba_mp, ba_mp_t *orig_mp, int dim);
-
-/* */
static int _check_deny_pass(int dim);
/* */
-static int _find_path(List mps, ba_mp_t *start_mp, int dim,
- uint16_t geometry, uint16_t conn_type,
- int *block_start, int *block_end);
+static int _fill_in_wires(List mps, ba_mp_t *start_mp, int dim,
+ uint16_t geometry, uint16_t conn_type,
+ bool full_check);
/* */
static void _setup_next_mps(int level, uint16_t *coords);
@@ -115,6 +104,21 @@
/** */
static bool _mp_out_used(ba_mp_t* ba_mp, int dim);
+/** */
+static uint16_t _find_distance(uint16_t start, uint16_t end, int dim);
+
+static int _ba_set_ionode_str_internal(int level, int *coords,
+ int *start_offset, int *end_offset,
+ hostlist_t hl);
+
+static bitstr_t *_find_sub_block(ba_geo_table_t **geo_table,
+ uint16_t *start_loc, bitstr_t *total_bitmap,
+ uint32_t node_count);
+
+static ba_geo_table_t *_find_geo_table(uint32_t orig_node_count,
+ uint32_t *node_count,
+ uint32_t total_count);
+
extern void ba_create_system()
{
int a,x,y,z, i = 0, dim;
@@ -124,44 +128,7 @@
if (ba_main_grid)
ba_destroy_system();
- ba_main_grid = (ba_mp_t****)
- xmalloc(sizeof(ba_mp_t***) * DIM_SIZE[A]);
- for (a = 0; a < DIM_SIZE[A]; a++) {
- ba_main_grid[a] = (ba_mp_t***)
- xmalloc(sizeof(ba_mp_t**) * DIM_SIZE[X]);
- for (x = 0; x < DIM_SIZE[X]; x++) {
- ba_main_grid[a][x] = (ba_mp_t**)
- xmalloc(sizeof(ba_mp_t*) * DIM_SIZE[Y]);
- for (y = 0; y < DIM_SIZE[Y]; y++) {
- ba_main_grid[a][x][y] = (ba_mp_t*)
- xmalloc(sizeof(ba_mp_t) * DIM_SIZE[Z]);
- for (z = 0; z < DIM_SIZE[Z]; z++) {
- ba_mp_t *ba_mp = &ba_main_grid
- [a][x][y][z];
- ba_mp->coord[A] = a;
- ba_mp->coord[X] = x;
- ba_mp->coord[Y] = y;
- ba_mp->coord[Z] = z;
-
- snprintf(ba_mp->coord_str,
- sizeof(ba_mp->coord_str),
- "%c%c%c%c",
- alpha_num[ba_mp->coord[A]],
- alpha_num[ba_mp->coord[X]],
- alpha_num[ba_mp->coord[Y]],
- alpha_num[ba_mp->coord[Z]]);
- ba_setup_mp(ba_mp, true, false);
- ba_mp->state = NODE_STATE_IDLE;
- /* This might get changed
- later, but just incase set
- it up here.
- */
- ba_mp->index = i++;
- }
- }
- }
- }
-
+ slurm_mutex_lock(&ba_system_mutex);
/* build all the possible geos for the mid planes */
ba_main_geo_system = xmalloc(sizeof(ba_geo_system_t));
ba_main_geo_system->dim_count = SYSTEM_DIMENSIONS;
@@ -171,7 +138,7 @@
for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
ba_main_geo_system->dim_size[dim] = DIM_SIZE[dim];
- ba_create_geo_table(ba_main_geo_system);
+ ba_create_geo_table(ba_main_geo_system, 0);
//ba_print_geo_table(ba_main_geo_system);
/* build all the possible geos for a sub block inside a mid plane */
@@ -185,11 +152,25 @@
ba_mp_geo_system->dim_size[2] = 4;
ba_mp_geo_system->dim_size[3] = 4;
ba_mp_geo_system->dim_size[4] = 2;
- ba_create_geo_table(ba_mp_geo_system);
+ /* FIXME: We need to not create and geo with a dimension of 3 in it.
+ * There apparently is a limitation in BGQ where you can't
+ * make a sub-block with a dimension of 3. If this ever goes
+ * away just remove remove the extra parameter to the
+ * ba_create_geo_table.
+ *
+ * FROM IBM:
+ * We have recently encountered a problematic scenario with
+ * sub-block jobs and how the system (used for I/O) and user
+ * (used for MPI) torus class routes are configured. The
+ * network device hardware has cutoff registers to prevent
+ * packets from flowing outside of the
+ * sub-block. Unfortunately, when the sub-block has a size 3,
+ * the job can attempt to send user packets outside of its
+ * sub-block. This causes it to be terminated by signal 36.
+ */
+ ba_create_geo_table(ba_mp_geo_system, 1);
//ba_print_geo_table(ba_mp_geo_system);
- _setup_next_mps(A, coords);
-
/* Now set it up to mark the corners of each nodecard. This
is used if running a sub-block job on a small block later.
*/
@@ -244,18 +225,80 @@
/* alpha_num[g_nc_coords[i].end[E]]); */
_increment_nc_coords(0, mp_coords, ba_mp_geo_system->dim_size);
}
+
+ /* Set up a flat array to be used in conjunction with the
+ ba_geo system.
+ */
+ ba_main_grid_array = xmalloc(sizeof(ba_mp_t *) *
+ ba_main_geo_system->total_size);
+ i = 0;
+ ba_main_grid = (ba_mp_t****)
+ xmalloc(sizeof(ba_mp_t***) * DIM_SIZE[A]);
+ for (a = 0; a < DIM_SIZE[A]; a++) {
+ ba_main_grid[a] = (ba_mp_t***)
+ xmalloc(sizeof(ba_mp_t**) * DIM_SIZE[X]);
+ for (x = 0; x < DIM_SIZE[X]; x++) {
+ ba_main_grid[a][x] = (ba_mp_t**)
+ xmalloc(sizeof(ba_mp_t*) * DIM_SIZE[Y]);
+ for (y = 0; y < DIM_SIZE[Y]; y++) {
+ ba_main_grid[a][x][y] = (ba_mp_t*)
+ xmalloc(sizeof(ba_mp_t) * DIM_SIZE[Z]);
+ for (z = 0; z < DIM_SIZE[Z]; z++) {
+ ba_mp_t *ba_mp = &ba_main_grid
+ [a][x][y][z];
+ ba_mp->coord[A] = a;
+ ba_mp->coord[X] = x;
+ ba_mp->coord[Y] = y;
+ ba_mp->coord[Z] = z;
+
+ snprintf(ba_mp->coord_str,
+ sizeof(ba_mp->coord_str),
+ "%c%c%c%c",
+ alpha_num[ba_mp->coord[A]],
+ alpha_num[ba_mp->coord[X]],
+ alpha_num[ba_mp->coord[Y]],
+ alpha_num[ba_mp->coord[Z]]);
+ ba_setup_mp(ba_mp, true, false);
+ ba_mp->state = NODE_STATE_IDLE;
+ /* This might get changed
+ later, but just incase set
+ it up here.
+ */
+ ba_mp->index = i++;
+ ba_mp->ba_geo_index =
+ ba_node_xlate_to_1d(
+ ba_mp->coord,
+ ba_main_geo_system);
+ ba_main_grid_array[ba_mp->ba_geo_index]
+ = ba_mp;
+ }
+ }
+ }
+ }
+
+ _setup_next_mps(A, coords);
+ slurm_mutex_unlock(&ba_system_mutex);
}
/** */
extern void ba_destroy_system(void)
{
- int a, x, y;
+ int a, x, y, z;
+
+ slurm_mutex_lock(&ba_system_mutex);
+ xfree(ba_main_grid_array);
if (ba_main_grid) {
for (a=0; a<DIM_SIZE[A]; a++) {
for (x = 0; x < DIM_SIZE[X]; x++) {
- for (y = 0; y < DIM_SIZE[Y]; y++)
+ for (y = 0; y < DIM_SIZE[Y]; y++) {
+ for (z=0; z < DIM_SIZE[Z]; z++) {
+ free_internal_ba_mp(
+ &ba_main_grid
+ [a][x][y][z]);
+ }
xfree(ba_main_grid[a][x][y]);
+ }
xfree(ba_main_grid[a][x]);
}
xfree(ba_main_grid[a]);
@@ -277,6 +320,7 @@
}
memset(DIM_SIZE, 0, sizeof(DIM_SIZE));
+ slurm_mutex_unlock(&ba_system_mutex);
}
/*
@@ -381,6 +425,7 @@
debug(" elongate:\t%d", ba_request->elongate);
}
+/* ba_system_mutex needs to be locked before calling this. */
extern ba_mp_t *coord2ba_mp(const uint16_t *coord)
{
if ((coord[A] >= DIM_SIZE[A]) || (coord[X] >= DIM_SIZE[X]) ||
@@ -405,12 +450,6 @@
*/
extern int allocate_block(select_ba_request_t* ba_request, List results)
{
- uint16_t start[cluster_dims];
- char *name=NULL;
- int i, dim, startx;
- ba_geo_table_t *ba_geo_table;
- bool found = false;
-
if (!ba_initialized){
error("Error, configuration not initialized, "
"calling ba_init(NULL, 1)");
@@ -425,114 +464,14 @@
if (!(cluster_flags & CLUSTER_FLAG_BG))
return 0;
- memset(start, 0, sizeof(start));
- startx = (start[X]-1);
+ if ((ba_request->save_name = set_bg_block(results, ba_request)))
+ return 1;
- if (startx == -1)
- startx = DIM_SIZE[X]-1;
- if (ba_request->start_req) {
- for (dim = 0; dim < cluster_dims; dim++) {
- if (ba_request->start[dim] >= DIM_SIZE[dim])
- return 0;
- start[dim] = ba_request->start[dim];
- }
- }
-
- /* set up the geo_table */
- if (ba_request->geometry[0] == (uint16_t)NO_VAL) {
- if (!(ba_request->geo_table =
- ba_main_geo_system->geo_table_ptr[ba_request->size])) {
- error("allocate_block: "
- "No geometries for %d midplanes",
- ba_request->size);
- return 0;
- }
- ba_geo_table = (ba_geo_table_t *)ba_request->geo_table;
- if (!ba_geo_table || !ba_geo_table->geometry) {
- error("allocate_block: no geo table");
- return 0;
- }
-
- memcpy(ba_request->geometry, ba_geo_table->geometry,
- sizeof(ba_request->geometry));
- } else
- ba_request->geo_table = NULL;
-
-start_again:
- i = 0;
- if (i == startx)
- i = startx-1;
- while (i != startx) {
- i++;
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
- info("allocate_block: finding %c%c%c%c try %d",
- alpha_num[ba_request->geometry[A]],
- alpha_num[ba_request->geometry[X]],
- alpha_num[ba_request->geometry[Y]],
- alpha_num[ba_request->geometry[Z]],
- i);
- new_mp:
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
- info("allocate_block: starting at %c%c%c%c",
- alpha_num[start[A]],
- alpha_num[start[X]],
- alpha_num[start[Y]],
- alpha_num[start[Z]]);
-
- if ((name = set_bg_block(results, start,
- ba_request->geometry,
- ba_request->conn_type))) {
- ba_request->save_name = name;
- name = NULL;
- return 1;
- }
-
- /* If there was an error set_bg_block resets the
- results list */
- /* if (results && list_count(results)) { */
- /* bool is_small = 0; */
- /* if (ba_request->conn_type[0] == SELECT_SMALL) */
- /* is_small = 1; */
- /* remove_block(results, is_small); */
- /* list_flush(results); */
- /* } */
-
- if (ba_request->start_req) {
- info("start asked for ");
- goto requested_end;
- }
- //exit(0);
- debug2("allocate_block: trying something else");
-
- found = false;
- for (dim = cluster_dims-1; dim >= 0; dim--) {
- start[dim]++;
- if (start[dim] < DIM_SIZE[dim]) {
- found = true;
- break;
- }
- start[dim] = 0;
- }
- if (!found) {
- if (ba_request->size == 1)
- goto requested_end;
- if (!_check_for_options(ba_request))
- return 0;
- else {
- memset(start, 0, sizeof(start));
- goto start_again;
- }
- }
- goto new_mp;
- }
-
-requested_end:
debug2("allocate_block: can't allocate");
return 0;
}
-
/*
* Admin wants to remove a previous allocation.
* will allow Admin to delete a previous allocation retrival by letter code.
@@ -544,30 +483,35 @@
ba_mp_t* ba_mp = NULL;
ListIterator itr;
+ slurm_mutex_lock(&ba_system_mutex);
itr = list_iterator_create(mps);
while ((curr_ba_mp = (ba_mp_t*) list_next(itr))) {
/* since the list that comes in might not be pointers
to the main list we need to point to that main list */
- ba_mp = &ba_main_grid
- [curr_ba_mp->coord[A]]
- [curr_ba_mp->coord[X]]
- [curr_ba_mp->coord[Y]]
- [curr_ba_mp->coord[Z]];
+ ba_mp = coord2ba_mp(curr_ba_mp->coord);
if (curr_ba_mp->used) {
ba_mp->used &= (~BA_MP_USED_TRUE);
if (ba_mp->used == BA_MP_USED_FALSE)
- bit_clear(ba_main_mp_bitmap, ba_mp->index);
+ bit_clear(ba_main_mp_bitmap,
+ ba_mp->ba_geo_index);
}
ba_mp->used &= (~BA_MP_USED_ALTERED_PASS);
/* Small blocks don't use wires, and only have 1 mp,
so just break. */
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
- info("remove_block: %s state now %d",
+ info("remove_block: midplane %s used state now %d",
ba_mp->coord_str, ba_mp->used);
for (dim=0; dim<cluster_dims; dim++) {
+ /* House the altered usage here without any
+ error so we don't take it from the original.
+ */
+ uint16_t altered_usage;
+
if (curr_ba_mp == ba_mp) {
+ altered_usage = ba_mp->alter_switch[dim].usage
+ & (~BG_SWITCH_CABLE_ERROR_FULL);
/* Remove the usage that was altered */
/* info("remove_block: %s(%d) %s removing %s", */
/* ba_mp->coord_str, dim, */
@@ -576,14 +520,22 @@
/* ba_switch_usage_str( */
/* ba_mp->alter_switch[dim].usage)); */
ba_mp->axis_switch[dim].usage &=
- (~ba_mp->alter_switch[dim].usage);
+ (~altered_usage);
/* info("remove_block: %s(%d) is now at %s", */
/* ba_mp->coord_str, dim, */
/* ba_switch_usage_str( */
/* ba_mp->axis_switch[dim].usage)); */
- } else if (curr_ba_mp->axis_switch[dim].usage
- != BG_SWITCH_NONE) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
+ continue;
+ }
+
+ /* Set this after we know curr_ba_mp isn't
+ the same as ba_mp so we don't mess up the
+ original.
+ */
+ altered_usage = curr_ba_mp->axis_switch[dim].usage
+ & (~BG_SWITCH_CABLE_ERROR_FULL);
+ if (altered_usage != BG_SWITCH_NONE) {
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
info("remove_block: 2 %s(%d) %s %s "
"removing %s",
ba_mp->coord_str, dim,
@@ -592,12 +544,11 @@
ba_mp->axis_switch
[dim].usage),
ba_switch_usage_str(
- curr_ba_mp->axis_switch
- [dim].usage));
+ altered_usage));
/* Just remove the usage set here */
ba_mp->axis_switch[dim].usage &=
- (~curr_ba_mp->axis_switch[dim].usage);
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
+ (~altered_usage);
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
info("remove_block: 2 %s(%d) is "
"now at %s",
ba_mp->coord_str, dim,
@@ -609,6 +560,7 @@
}
}
list_iterator_destroy(itr);
+ slurm_mutex_unlock(&ba_system_mutex);
return 1;
}
@@ -637,18 +589,11 @@
if (!mps)
return rc;
+ slurm_mutex_lock(&ba_system_mutex);
itr = list_iterator_create(mps);
while ((ba_mp = list_next(itr))) {
- /* info("checking %c%c%c", */
-/* ba_mp->coord[X], */
-/* ba_mp->coord[Y], */
-/* ba_mp->coord[Z]); */
-
- curr_ba_mp = &ba_main_grid
- [ba_mp->coord[A]]
- [ba_mp->coord[X]]
- [ba_mp->coord[Y]]
- [ba_mp->coord[Z]];
+ /* info("checking %s", ba_mp->coord_str); */
+ curr_ba_mp = coord2ba_mp(ba_mp->coord);
if (ba_mp->used && curr_ba_mp->used) {
/* Only error if the midplane isn't already
@@ -675,8 +620,9 @@
if (ba_mp->used) {
curr_ba_mp->used = ba_mp->used;
- xassert(!bit_test(ba_main_mp_bitmap, ba_mp->index));
- bit_set(ba_main_mp_bitmap, ba_mp->index);
+ xassert(!bit_test(ba_main_mp_bitmap,
+ ba_mp->ba_geo_index));
+ bit_set(ba_main_mp_bitmap, ba_mp->ba_geo_index);
}
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
@@ -691,6 +637,17 @@
if (ba_switch->usage == BG_SWITCH_NONE)
continue;
+ else if (ba_switch->usage
+ & BG_SWITCH_CABLE_ERROR_FULL) {
+ error("check_and_set_mp_list: Somehow we got "
+ "a switch with an error set in it. "
+ "This should never happen except "
+ "on a system with missing cables such "
+ "as a half rack system. %u",
+ ba_switch->usage);
+ continue;
+ }
+
if (ba_switch->usage & curr_ba_switch->usage) {
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
@@ -706,6 +663,24 @@
rc = SLURM_ERROR;
goto end_it;
}
+ /* Since we are only checking to see if this
+ block is creatable we don't need to check
+ hardware issues like bad cables.
+ */
+ /* else if ((curr_ba_switch->usage */
+ /* & BG_SWITCH_CABLE_ERROR_SET) */
+ /* && (ba_switch->usage & BG_SWITCH_OUT_PASS)) { */
+ /* if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) */
+ /* info("check_and_set_mp_list: " */
+ /* "%s(%d)'s cable is not available " */
+ /* "can't really make this block. " */
+ /* "We need %s and system is %s", */
+ /* ba_mp->coord_str, i, */
+ /* ba_switch_usage_str( */
+ /* ba_switch->usage), */
+ /* ba_switch_usage_str( */
+ /* curr_ba_switch->usage)); */
+ /* } */
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
info("check_and_set_mp_list: "
@@ -720,6 +695,7 @@
rc = SLURM_SUCCESS;
end_it:
list_iterator_destroy(itr);
+ slurm_mutex_unlock(&ba_system_mutex);
return rc;
}
@@ -730,132 +706,208 @@
* IN/OUT results - a list with a NULL destroyer filled in with
* midplanes and wires set to create the block with the api. If
* only interested in the hostlist NULL can be excepted also.
- * IN start - where to start the allocation.
- * IN geometry - the requested geometry of the block.
- * IN conn_type - mesh, torus, or small.
+ * IN ba_request - request for the block
+ *
+ * To be set in the ba_request
+ * start - where to start the allocation. (optional)
+ * geometry or size - the requested geometry of the block. (required)
+ * conn_type - mesh, torus, or small. (required)
*
* RET char * - hostlist of midplanes results represent must be
* xfreed. NULL on failure
*/
-extern char *set_bg_block(List results, uint16_t *start,
- uint16_t *geometry, uint16_t *conn_type)
+extern char *set_bg_block(List results, select_ba_request_t* ba_request)
{
List main_mps = NULL;
char *name = NULL;
ba_mp_t* ba_mp = NULL;
- ba_mp_t *check_mp[cluster_dims];
- int size = 1, dim;
- int block_start[cluster_dims];
- int block_end[cluster_dims];
- int coords[cluster_dims];
+ int dim;
uint16_t local_deny_pass = ba_deny_pass;
+ ba_geo_table_t *ba_geo_table = NULL;
+ bitstr_t *success_bitmap = NULL;
+ uint16_t orig_conn_type[HIGHEST_DIMENSIONS];
- if (!ba_initialized){
- error("Error, configuration not initialized, "
- "calling ba_init(NULL, 1)");
- ba_init(NULL, 1);
- }
+ xassert(ba_initialized);
- if (cluster_dims == 1) {
- if (start[A] >= DIM_SIZE[A])
+ if (!ba_request->size) {
+ if (ba_request->geometry[0] == (uint16_t)NO_VAL) {
+ error("set_bg_block: No size or geometry given.");
return NULL;
- size = geometry[X];
- ba_mp = &ba_main_grid[start[A]][0][0][0];
- } else {
- for (dim=0; dim<cluster_dims; dim++) {
- if (start[dim] >= DIM_SIZE[dim])
- return NULL;
- if (geometry[dim] <= 0) {
- error("problem with geometry of %c in dim %d, "
- "needs to be at least 1",
- alpha_num[geometry[dim]], dim);
- return NULL;
- }
- size *= geometry[dim];
}
- ba_mp = &ba_main_grid[start[A]][start[X]][start[Y]][start[Z]];
- /* info("looking at %s", ba_mp->coord_str); */
+ ba_request->size = 1;
+ for (dim=0; dim<cluster_dims; dim++)
+ ba_request->size *= ba_request->geometry[dim];
}
+ /* set up the geo_table */
- if (!ba_mp)
- goto end_it;
-
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("trying mp %s %c%c%c%c %d",
- ba_mp->coord_str,
- alpha_num[geometry[A]],
- alpha_num[geometry[X]],
- alpha_num[geometry[Y]],
- alpha_num[geometry[Z]],
- conn_type[A]);
-
- /* check just the first dim to see if this node is used for
- anything just yet. */
- if (_mp_used(ba_mp, 0))
- goto end_it;
-
- if (conn_type[A] >= SELECT_SMALL) {
- /* adding the ba_mp and end, we could go through the
- * regular logic here, but this is just faster. */
- if (results) {
- ba_mp = ba_copy_mp(ba_mp);
- /* We need to have this node wrapped in Q to handle
- wires correctly when creating around the midplane.
- */
- ba_setup_mp(ba_mp, false, true);
- ba_mp->used = BA_MP_USED_TRUE;
- list_append(results, ba_mp);
- }
- name = xstrdup(ba_mp->coord_str);
- goto end_it;
+ xassert(ba_request->size);
+ if (!(ba_geo_table =
+ ba_main_geo_system->geo_table_ptr[ba_request->size])) {
+ error("set_bg_block: No geometries for %d midplanes",
+ ba_request->size);
+ return NULL;
}
-
- main_mps = list_create(NULL);
-
- ba_mp->used |= BA_MP_USED_ALTERED;
- list_append(main_mps, ba_mp);
-
if (!deny_pass)
deny_pass = &local_deny_pass;
- /* set the end to the start and the _find_path will increase each dim.*/
- for (dim=0; dim<cluster_dims; dim++) {
- block_start[dim] = start[dim];
- block_end[dim] = start[dim];
- if (!_find_path(main_mps, ba_mp, dim, geometry[dim],
- conn_type[dim], &block_start[dim],
- &block_end[dim])) {
- goto end_it;
+ memcpy(orig_conn_type, ba_request->conn_type,
+ sizeof(ba_request->conn_type));
+
+ slurm_mutex_lock(&ba_system_mutex);
+ while (ba_geo_table) {
+ ListIterator itr;
+ int scan_offset = 0, cnt = 0, i=0;
+ uint16_t start_loc[ba_main_geo_system->dim_count];
+
+ if (ba_request->geometry[0] != (uint16_t)NO_VAL) {
+ /* if we are requesting a specific geo, go directly to
+ that geo_table. */
+ if (memcmp(ba_request->geometry, ba_geo_table->geometry,
+ sizeof(uint16_t) * cluster_dims)) {
+ ba_geo_table = ba_geo_table->next_ptr;
+ continue;
+ }
}
+
+ try_again:
+ if (success_bitmap)
+ FREE_NULL_BITMAP(success_bitmap);
+ if (main_mps && list_count(main_mps)) {
+ _reset_altered_mps(main_mps, 0);
+ list_flush(main_mps);
+ }
+
+ if (ba_geo_test_all(ba_main_mp_bitmap,
+ &success_bitmap,
+ ba_geo_table, &cnt,
+ ba_main_geo_system, deny_pass,
+ start_loc, &scan_offset, false)
+ != SLURM_SUCCESS) {
+ if (ba_request->geometry[0] != (uint16_t)NO_VAL) {
+ ba_geo_table = NULL;
+ break;
+ }
+
+ ba_geo_table = ba_geo_table->next_ptr;
+ continue;
+ }
+
+ if (ba_request->start_req) {
+ /* if we are requesting a specific start make
+ sure that is what is returned. Else try
+ again. Since this only happens with smap
+ or startup this handling it this way
+ shouldn't be that big of a deal. */
+ if (memcmp(ba_request->start, start_loc,
+ sizeof(uint16_t) * cluster_dims))
+ goto try_again;
+ }
+
+ main_mps = list_create(NULL);
+ for (i=0; i<ba_main_geo_system->total_size; i++) {
+ if (!bit_test(success_bitmap, i))
+ continue;
+ ba_mp = ba_main_grid_array[i];
+ xassert(ba_mp);
+
+ for (dim=0; dim<cluster_dims; dim++) {
+ if (_mp_used(ba_mp, dim))
+ goto try_again;
+
+ if (ba_geo_table->geometry[dim] == 1) {
+ /* Always check MESH here since we
+ * only care about the IN/OUT ports.
+ * all 1 dimensions need a TORUS */
+ ba_mp->alter_switch[dim].usage
+ |= BG_SWITCH_WRAPPED;
+ if (ba_debug_flags
+ & DEBUG_FLAG_BG_ALGO_DEEP)
+ info("set_bg_block: "
+ "using mp %s(%d) "
+ "in 1 geo %s added %s",
+ ba_mp->coord_str, dim,
+ ba_switch_usage_str(
+ ba_mp->
+ axis_switch[dim].
+ usage),
+ ba_switch_usage_str(
+ ba_mp->
+ alter_switch[dim].
+ usage));
+ continue;
+ }
+ }
+ ba_mp->used = BA_MP_USED_ALTERED;
+ list_append(main_mps, ba_mp);
+ }
+ /* If we are going to take up the entire dimension
+ might as well force it to be TORUS. Check against
+ MESH here instead of !TORUS so we don't mess up
+ small block allocations.
+ */
+ for (dim=0; dim<cluster_dims; dim++) {
+ if (((ba_request->conn_type[dim] == SELECT_MESH)
+ || (ba_request->conn_type[dim] == SELECT_NAV))
+ && ((ba_geo_table->geometry[dim] == 1)
+ || (ba_geo_table->geometry[dim]
+ == DIM_SIZE[dim]))) {
+ /* On a Q all single midplane blocks
+ * must be a TORUS.
+ *
+ * Also if we are using all midplanes
+ * in a dimension might as well make
+ * it a torus.
+ */
+ ba_request->conn_type[dim] = SELECT_TORUS;
+ } else if (ba_request->conn_type[dim] == SELECT_NAV) {
+ /* Set everything else to the default */
+ ba_request->conn_type[dim] =
+ bg_conf->default_conn_type[dim];
+ }
+ }
+
+ itr = list_iterator_create(main_mps);
+ while ((ba_mp = list_next(itr))) {
+ if (ba_mp->used & BA_MP_USED_PASS_BIT)
+ continue;
+ for (dim=0; dim<cluster_dims; dim++) {
+ if ((ba_geo_table->geometry[dim] == 1)
+ || (ba_mp->coord[dim] != start_loc[dim]))
+ continue;
+ if (!_fill_in_wires(
+ main_mps, ba_mp, dim,
+ ba_geo_table->geometry[dim],
+ ba_request->conn_type[dim],
+ ba_request->full_check)) {
+ list_iterator_destroy(itr);
+ memcpy(ba_request->conn_type,
+ orig_conn_type,
+ sizeof(ba_request->conn_type));
+ goto try_again;
+ }
+ }
+ }
+ list_iterator_destroy(itr);
+
+ /* fill in the start with the actual start of the
+ * block since it isn't always easy to figure out and
+ * is easily */
+ memcpy(ba_request->start, start_loc, sizeof(ba_request->start));
+
+ break;
}
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
- info("complete box is %c%c%c%c x %c%c%c%c",
- alpha_num[block_start[A]],
- alpha_num[block_start[X]],
- alpha_num[block_start[Y]],
- alpha_num[block_start[Z]],
- alpha_num[block_end[A]],
- alpha_num[block_end[X]],
- alpha_num[block_end[Y]],
- alpha_num[block_end[Z]]);
+ if (success_bitmap)
+ FREE_NULL_BITMAP(success_bitmap);
- if (_fill_in_coords(main_mps, A, ba_mp, check_mp,
- block_start, block_end, coords) == -1)
- goto end_it;
-
- if (_finish_torus(main_mps, A, block_start,
- block_end, conn_type, coords) == -1)
- goto end_it;
-
- /* Success */
- if (results)
- name = _copy_from_main(main_mps, results);
- else
- name = _reset_altered_mps(main_mps, 1);
-
-end_it:
+ if (ba_geo_table) {
+ /* Success */
+ if (results)
+ name = _copy_from_main(main_mps, results);
+ else
+ name = _reset_altered_mps(main_mps, 1);
+ }
if (main_mps) {
/* handle failure */
@@ -864,6 +916,7 @@
list_destroy(main_mps);
main_mps = NULL;
}
+ slurm_mutex_unlock(&ba_system_mutex);
if (name)
debug2("name = %s", name);
@@ -924,7 +977,142 @@
}
-extern ba_mp_t *ba_pick_sub_block_cnodes(
+extern bool ba_sub_block_in_bitmap(select_jobinfo_t *jobinfo,
+ bitstr_t *usable_bitmap, bool step)
+{
+ bitstr_t *found_bits = NULL;
+ uint32_t node_count;
+ ba_geo_table_t *geo_table = NULL;
+ int clr_cnt, dim;
+ uint16_t start_loc[ba_mp_geo_system->dim_count];
+
+ xassert(jobinfo);
+ xassert(usable_bitmap);
+
+ node_count = jobinfo->cnode_cnt;
+ clr_cnt = bit_clear_count(usable_bitmap);
+
+ if (clr_cnt < node_count)
+ return false;
+
+ jobinfo->dim_cnt = ba_mp_geo_system->dim_count;
+
+try_again:
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ bit_not(usable_bitmap);
+ char *tmp_char = ba_node_map_ranged_hostlist(
+ usable_bitmap, ba_mp_geo_system);
+ bit_not(usable_bitmap);
+ info("ba_sub_block_in_bitmap: "
+ "looking for %u in a field of %u (%s).",
+ node_count, clr_cnt, tmp_char);
+ xfree(tmp_char);
+ }
+
+ if (!(geo_table = _find_geo_table(node_count, &node_count, clr_cnt)))
+ return false;
+
+ if (!(found_bits = _find_sub_block(
+ &geo_table, start_loc, usable_bitmap, node_count))) {
+ /* This is to vet we have a good geo on this request. So if a
+ person asks for 12 and the only reason they can't get it is
+ because they can't get that geo and if they would of asked
+ for 16 then they could run we do that for them.
+ */
+ node_count++;
+ if (clr_cnt > node_count) {
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
+ info("trying with a larger size");
+ goto try_again;
+ }
+ return false;
+ }
+
+ if (jobinfo->units_avail)
+ FREE_NULL_BITMAP(jobinfo->units_avail);
+ if (jobinfo->units_used)
+ FREE_NULL_BITMAP(jobinfo->units_used);
+
+ jobinfo->units_avail = found_bits;
+ found_bits = NULL;
+ jobinfo->units_used = bit_copy(jobinfo->units_avail);
+ /* ba_sub_block_in_bitmap works for both job and step
+ allocations. It sets the units_used to the
+ opposite of units_available by default. If used for a step
+ we want all units used to be that of the avail for easy
+ clearing.
+ */
+ if (!step)
+ bit_not(jobinfo->units_used);
+ xfree(jobinfo->ionode_str);
+
+ jobinfo->cnode_cnt = node_count;
+
+ for (dim = 0; dim < jobinfo->dim_cnt; dim++) {
+ jobinfo->geometry[dim] = geo_table->geometry[dim];
+ jobinfo->start_loc[dim] = start_loc[dim];
+ }
+
+ if (node_count < bg_conf->mp_cnode_cnt) {
+ jobinfo->ionode_str = ba_node_map_ranged_hostlist(
+ jobinfo->units_avail, ba_mp_geo_system);
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ char *tmp_char;
+ bitstr_t *total_bitmap = bit_copy(usable_bitmap);
+ bit_or(total_bitmap, jobinfo->units_avail);
+ bit_not(total_bitmap);
+ tmp_char = ba_node_map_ranged_hostlist(
+ total_bitmap, ba_mp_geo_system);
+ FREE_NULL_BITMAP(total_bitmap);
+ info("ba_sub_block_in_bitmap: "
+ "can use cnodes %s leaving '%s' usable.",
+ jobinfo->ionode_str, tmp_char);
+ xfree(tmp_char);
+ }
+ } else if (ba_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ info("ba_sub_block_in_bitmap: "
+ "can use all cnodes leaving none usable.");
+ }
+
+ return true;
+}
+
+extern int ba_sub_block_in_bitmap_clear(
+ select_jobinfo_t *jobinfo, bitstr_t *usable_bitmap)
+{
+ char *tmp_char = NULL, *tmp_char2 = NULL;
+
+ if (!jobinfo->units_avail) {
+ error("ba_sub_block_in_bitmap_clear: "
+ "no units avail bitmap on the jobinfo");
+ return SLURM_ERROR;
+ }
+
+ /* use units_avail here instead of units_used so it works for
+ both jobs and steps with no other code.
+ */
+ bit_not(jobinfo->units_avail);
+ bit_and(usable_bitmap, jobinfo->units_avail);
+ bit_not(jobinfo->units_avail);
+
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ tmp_char = ba_node_map_ranged_hostlist(
+ jobinfo->units_avail, ba_mp_geo_system);
+ bit_not(usable_bitmap);
+ tmp_char2 = ba_node_map_ranged_hostlist(
+ usable_bitmap, ba_mp_geo_system);
+ bit_not(usable_bitmap);
+ info("ba_sub_block_in_bitmap_clear: "
+ "cleared cnodes %s making '%s' available.",
+ tmp_char, tmp_char2);
+ xfree(tmp_char);
+ xfree(tmp_char2);
+ }
+
+ return SLURM_SUCCESS;
+}
+
+extern ba_mp_t *ba_sub_block_in_record(
bg_record_t *bg_record, uint32_t *node_count, select_jobinfo_t *jobinfo)
{
ListIterator itr = NULL;
@@ -934,6 +1122,8 @@
uint32_t orig_node_count = *node_count;
int dim;
uint32_t max_clear_cnt = 0, clear_cnt;
+ bitstr_t *total_bitmap = NULL;
+ uint16_t start_loc[ba_mp_geo_system->dim_count];
xassert(ba_mp_geo_system);
xassert(bg_record->ba_mp_list);
@@ -943,37 +1133,12 @@
jobinfo->dim_cnt = ba_mp_geo_system->dim_count;
try_again:
- while (!(geo_table = ba_mp_geo_system->geo_table_ptr[*node_count])) {
- debug2("ba_pick_sub_block_cnodes: No geometries of size %u ",
- *node_count);
- (*node_count)++;
- if (*node_count > bg_record->cnode_cnt)
- break;
- }
- if (*node_count > bg_record->cnode_cnt) {
- debug("ba_pick_sub_block_cnodes: requested sub-block larger "
- "than block");
+ if (!(geo_table = _find_geo_table(
+ orig_node_count, node_count, bg_record->cnode_cnt)))
return NULL;
- }
-
- if (orig_node_count != *node_count)
- debug("ba_pick_sub_block_cnodes: user requested %u nodes, "
- "but that can't make a block, giving them %d",
- orig_node_count, *node_count);
-
- if (!geo_table) {
- /* This should never happen */
- error("ba_pick_sub_block_cnodes: "
- "Couldn't place this job size %u tried up to "
- "the full size of the block (%u)",
- orig_node_count, bg_record->cnode_cnt);
- return NULL;
- }
itr = list_iterator_create(bg_record->ba_mp_list);
while ((ba_mp = list_next(itr))) {
- int cnt = 0;
-
if (!ba_mp->used)
continue;
@@ -984,93 +1149,27 @@
if (!ba_mp->cnode_bitmap)
ba_mp->cnode_bitmap =
ba_create_ba_mp_cnode_bitmap(bg_record);
- clear_cnt = bit_clear_count(ba_mp->cnode_bitmap);
- if (clear_cnt < *node_count) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("ba_pick_sub_block_cnodes: "
- "only have %d avail in %s need %d",
- clear_cnt,
- ba_mp->coord_str, *node_count);
- continue;
- }
+ if (!ba_mp->cnode_err_bitmap)
+ ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+ total_bitmap = bit_copy(ba_mp->cnode_bitmap);
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
- while (geo_table) {
- int scan_offset = 0;
- uint16_t start_loc[ba_mp_geo_system->dim_count];
-
- /* FIXME: In the current IBM API it doesn't
- allow wrapping inside the midplane. In the
- future this will change. When that happens
- there will need to be a flag that is sent
- here instead of always true.
- */
- if (ba_geo_test_all(ba_mp->cnode_bitmap,
- &jobinfo->units_used,
- geo_table, &cnt,
- ba_mp_geo_system, NULL,
- start_loc, &scan_offset, true)
- != SLURM_SUCCESS) {
- geo_table = geo_table->next_ptr;
- continue;
- }
-
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
- info("scan_offset=%d", scan_offset);
- for (dim = 0;
- dim < ba_mp_geo_system->dim_count;
- dim++) {
- info("start_loc[%d]=%u geometry[%d]=%u",
- dim, start_loc[dim], dim,
- geo_table->geometry[dim]);
- }
- }
-
- bit_or(ba_mp->cnode_bitmap, jobinfo->units_used);
- jobinfo->ionode_str = ba_node_map_ranged_hostlist(
- jobinfo->units_used, ba_mp_geo_system);
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
- bit_not(ba_mp->cnode_bitmap);
- tmp_char = ba_node_map_ranged_hostlist(
- ba_mp->cnode_bitmap, ba_mp_geo_system);
- bit_not(ba_mp->cnode_bitmap);
- info("ba_pick_sub_block_cnodes: "
- "using cnodes %s on mp %s "
- "leaving '%s' usable in this block (%s)",
- jobinfo->ionode_str,
- ba_mp->coord_str, tmp_char,
- bg_record->bg_block_id);
- xfree(tmp_char);
- }
- for (dim = 0; dim < jobinfo->dim_cnt; dim++) {
- jobinfo->geometry[dim] =
- geo_table->geometry[dim];
- jobinfo->start_loc[dim] = start_loc[dim];
- }
- break;
- }
-
- if (geo_table)
+ if ((jobinfo->units_used = _find_sub_block(
+ &geo_table, start_loc, total_bitmap, *node_count)))
break;
- /* User asked for a bad CPU count or we can't place it
- here in this small allocation. */
- if (jobinfo->cnode_cnt < bg_conf->mp_cnode_cnt) {
- list_iterator_destroy(itr);
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("We couldn't place a sub block of %d",
- *node_count);
- (*node_count)++;
- goto try_again;
- }
+ clear_cnt = bit_clear_count(total_bitmap);
+
+ FREE_NULL_BITMAP(total_bitmap);
/* Grab the most empty midplane to be used later if we
can't find a spot.
*/
- if (max_clear_cnt < clear_cnt) {
+ if (max_clear_cnt < clear_cnt)
max_clear_cnt = clear_cnt;
- }
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
info("couldn't place it on %s", ba_mp->coord_str);
geo_table = ba_mp_geo_system->geo_table_ptr[*node_count];
}
@@ -1081,24 +1180,63 @@
because they can't get that geo and if they would of asked
for 16 then they could run we do that for them.
*/
- if (!ba_mp && (max_clear_cnt > (*node_count)+1)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("trying with a larger size");
- (*node_count)++;
- goto try_again;
+ if (!ba_mp) {
+ if (max_clear_cnt > (*node_count)+1) {
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO)
+ info("trying with a larger size");
+ (*node_count)++;
+ goto try_again;
+ }
+ return NULL;
}
+ /* SUCCESS! */
+
+ /* Since we use conn_type as the relative start point, if the
+ block uses more than 1 midplane we need to give the
+ relative start point a boost when we go to a different midplane.
+ */
+ memset(jobinfo->conn_type, 0, sizeof(jobinfo->conn_type));
+ for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
+ jobinfo->conn_type[dim] = _find_distance(
+ bg_record->start[dim], ba_mp->coord[dim], dim);
+
+ bit_or(ba_mp->cnode_bitmap, jobinfo->units_used);
+ jobinfo->ionode_str = ba_node_map_ranged_hostlist(
+ jobinfo->units_used, ba_mp_geo_system);
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ bit_or(total_bitmap, jobinfo->units_used);
+ bit_not(total_bitmap);
+ tmp_char = ba_node_map_ranged_hostlist(
+ total_bitmap, ba_mp_geo_system);
+ info("ba_sub_block_in_record: "
+ "using cnodes %s on mp %s "
+ "leaving '%s' on this midplane "
+ "usable in this block (%s)",
+ jobinfo->ionode_str,
+ ba_mp->coord_str, tmp_char,
+ bg_record->bg_block_id);
+ xfree(tmp_char);
+ }
+
+ for (dim = 0; dim < jobinfo->dim_cnt; dim++) {
+ jobinfo->geometry[dim] =
+ geo_table->geometry[dim];
+ jobinfo->start_loc[dim] = start_loc[dim];
+ }
+ FREE_NULL_BITMAP(total_bitmap);
+
return ba_mp;
}
-extern int ba_clear_sub_block_cnodes(
+extern int ba_sub_block_in_record_clear(
bg_record_t *bg_record, struct step_record *step_ptr)
{
bitoff_t bit;
ListIterator itr = NULL;
ba_mp_t *ba_mp = NULL;
select_jobinfo_t *jobinfo = NULL;
- char *tmp_char = NULL, *tmp_char2 = NULL;
+ char *tmp_char = NULL, *tmp_char2 = NULL, *tmp_char3 = NULL;
xassert(bg_record);
xassert(step_ptr);
@@ -1113,7 +1251,7 @@
return SLURM_SUCCESS;
if ((bit = bit_ffs(step_ptr->step_node_bitmap)) == -1) {
- error("ba_clear_sub_block_cnodes: "
+ error("ba_sub_block_in_record_clear: "
"we couldn't find any bits set");
return SLURM_ERROR;
}
@@ -1124,36 +1262,184 @@
continue;
if (!jobinfo->units_used) {
/* from older version of slurm */
- error("ba_clear_sub_block_cnodes: "
+ error("ba_sub_block_in_record_clear: "
"didn't have the units_used bitmap "
"for some reason?");
- continue;
+ break;
+ } else if (!ba_mp->cnode_bitmap) {
+ /* If the job allocation has already finished
+ before processing the job step completion
+ this could happen, but it should already be
+ checked before it gets here so this should
+ never happen, this is just for safely sake.
+ */
+ error("ba_sub_block_in_record_clear: no cnode_bitmap? "
+ "job %u(%p) is in state %s on block %s %u(%p). "
+ "This should never happen.",
+ step_ptr->job_ptr->job_id, step_ptr->job_ptr,
+ job_state_string(step_ptr->job_ptr->job_state
+ & (~JOB_CONFIGURING)),
+ bg_record->bg_block_id, bg_record->job_running,
+ bg_record->job_ptr);
+ break;
}
bit_not(jobinfo->units_used);
bit_and(ba_mp->cnode_bitmap, jobinfo->units_used);
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ bitstr_t *total_bitmap = bit_copy(ba_mp->cnode_bitmap);
+ if (ba_mp->cnode_err_bitmap) {
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
+ tmp_char3 = ba_node_map_ranged_hostlist(
+ ba_mp->cnode_err_bitmap,
+ ba_mp_geo_system);
+ }
+
bit_not(jobinfo->units_used);
tmp_char = ba_node_map_ranged_hostlist(
jobinfo->units_used, ba_mp_geo_system);
- bit_not(ba_mp->cnode_bitmap);
+ bit_not(total_bitmap);
tmp_char2 = ba_node_map_ranged_hostlist(
- ba_mp->cnode_bitmap, ba_mp_geo_system);
- bit_not(ba_mp->cnode_bitmap);
- info("ba_clear_sub_block_cnodes: "
- "cleared cnodes %s on mp %s, making '%s' usable "
- "in this block (%s)",
+ total_bitmap, ba_mp_geo_system);
+ info("ba_sub_block_in_record_clear: "
+ "cleared cnodes %s on mp %s, making '%s' "
+ "on this midplane usable in this block (%s), "
+ "%s are in Software Failure",
tmp_char, ba_mp->coord_str, tmp_char2,
- bg_record->bg_block_id);
+ bg_record->bg_block_id, tmp_char3);
xfree(tmp_char);
xfree(tmp_char2);
+ xfree(tmp_char3);
+ FREE_NULL_BITMAP(total_bitmap);
}
+ break;
}
list_iterator_destroy(itr);
return SLURM_SUCCESS;
}
+extern void ba_sync_job_to_block(bg_record_t *bg_record,
+ struct job_record *job_ptr)
+{
+ struct step_record *step_ptr;
+ ListIterator itr;
+ ba_mp_t *ba_mp;
+ select_jobinfo_t *jobinfo, *step_jobinfo;
+
+ xassert(bg_record);
+ xassert(job_ptr);
+
+ if (bg_record->job_list) {
+ if (!find_job_in_bg_record(bg_record, job_ptr->job_id)) {
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ list_append(bg_record->job_list, job_ptr);
+ jobinfo = job_ptr->select_jobinfo->data;
+ /* If you were switching from no sub-block
+ allocations to allowing it, the units_avail
+ wouldn't be around for any jobs, but no
+ problem since they were always the size of
+ the block.
+ */
+ if (!jobinfo->units_avail) {
+ jobinfo->units_avail =
+ bit_copy(ba_mp->cnode_bitmap);
+ bit_not(jobinfo->units_avail);
+ }
+
+ /* Since we are syncing this information lets
+ clear out the old stuff. (You need to use
+ the jobinfo->units_avail here instead of
+ ba_mp->cnode_bitmap because the above trick
+ only works when coming from a system where
+ no sub-block allocation was allowed.)
+ */
+ FREE_NULL_BITMAP(jobinfo->units_used);
+ jobinfo->units_used = bit_copy(jobinfo->units_avail);
+ bit_not(jobinfo->units_used);
+ if (bit_overlap(ba_mp->cnode_bitmap,
+ jobinfo->units_avail)) {
+ error("we have an overlapping job allocation "
+ "(%u) mp %s", job_ptr->job_id,
+ ba_mp->coord_str);
+ }
+ bit_or(ba_mp->cnode_bitmap, jobinfo->units_avail);
+ /* info("%s now has %d left", ba_mp->coord_str, */
+ /* bit_clear_count(ba_mp->cnode_bitmap)); */
+ itr = list_iterator_create(job_ptr->step_list);
+ while ((step_ptr = list_next(itr))) {
+ step_jobinfo = step_ptr->select_jobinfo->data;
+ if (bit_overlap(jobinfo->units_used,
+ step_jobinfo->units_avail)) {
+ error("we have an overlapping step "
+ "(%u.%u) mp %s", job_ptr->job_id,
+ step_ptr->step_id,
+ ba_mp->coord_str);
+ }
+ bit_or(jobinfo->units_used,
+ step_jobinfo->units_avail);
+ /* info("allocation %u now has %d left", */
+ /* job_ptr->job_id, */
+ /* bit_clear_count(jobinfo->units_used));*/
+ }
+ list_iterator_destroy(itr);
+ }
+ } else {
+ ListIterator ba_itr = NULL;
+
+ bg_record->job_running = job_ptr->job_id;
+ bg_record->job_ptr = job_ptr;
+
+ itr = list_iterator_create(job_ptr->step_list);
+ while ((step_ptr = list_next(itr))) {
+ struct node_record *node_ptr;
+ int node_inx;
+
+ jobinfo = step_ptr->select_jobinfo->data;
+ if (jobinfo->cnode_cnt == bg_record->cnode_cnt)
+ continue;
+
+ if (!ba_itr)
+ ba_itr = list_iterator_create(
+ bg_record->ba_mp_list);
+ else
+ list_iterator_reset(ba_itr);
+
+ if (!(node_ptr = find_node_record(
+ step_ptr->step_layout->node_list))) {
+ error("can't find midplane %s",
+ step_ptr->step_layout->node_list);
+ continue;
+ }
+ node_inx = node_ptr - node_record_table_ptr;
+ while ((ba_mp = list_next(ba_itr))) {
+ if (node_inx != ba_mp->index)
+ continue;
+ if (!ba_mp->cnode_bitmap)
+ ba_mp->cnode_bitmap =
+ ba_create_ba_mp_cnode_bitmap(
+ bg_record);
+ if (!ba_mp->cnode_err_bitmap)
+ ba_mp->cnode_err_bitmap = bit_alloc(
+ bg_conf->mp_cnode_cnt);
+ if (bit_overlap(ba_mp->cnode_bitmap,
+ jobinfo->units_used)) {
+ error("we have an overlapping step "
+ "(%u.%u) mp %s", job_ptr->job_id,
+ step_ptr->step_id,
+ ba_mp->coord_str);
+ }
+ bit_or(ba_mp->cnode_bitmap,
+ jobinfo->units_used);
+ break;
+ }
+ }
+ list_iterator_destroy(itr);
+ if (ba_itr)
+ list_iterator_destroy(ba_itr);
+ }
+}
+
extern bitstr_t *ba_create_ba_mp_cnode_bitmap(bg_record_t *bg_record)
{
int start, end, ionode_num;
@@ -1197,362 +1483,252 @@
return cnode_bitmap;
}
-
-static int _ba_set_ionode_str_internal(int level, int *coords,
- int *start_offset, int *end_offset,
- hostlist_t hl)
+extern void ba_set_ionode_str(bg_record_t *bg_record)
{
- char tmp_char[6];
+ int ionode_num, coords[5];
+ hostlist_t hl;
+ bool set_small = 0;
- xassert(hl);
+ if (!bg_record->ionode_bitmap
+ || bit_ffs(bg_record->ionode_bitmap) == -1)
+ return;
- if (level > 5)
- return -1;
+ hl = hostlist_create_dims("", 5);
- if (level < 5) {
- for (coords[level] = start_offset[level];
- coords[level] <= end_offset[level];
- coords[level]++) {
- /* handle the outter dims here */
+ for (ionode_num = bit_ffs(bg_record->ionode_bitmap);
+ ionode_num <= bit_fls(bg_record->ionode_bitmap);
+ ionode_num++) {
+ int nc_num, nc_start, nc_end;
+
+ if (!bit_test(bg_record->ionode_bitmap, ionode_num))
+ continue;
+
+ nc_start = ionode_num * (int)bg_conf->nc_ratio;
+
+ if (!set_small) {
+ int dim;
+ set_small = 1;
+ for (dim = 0; dim<5; dim++)
+ bg_record->start_small[dim] =
+ g_nc_coords[nc_start].start[dim];
+ }
+
+ nc_end = nc_start + (int)bg_conf->nc_ratio;
+
+ for (nc_num = nc_start; nc_num < nc_end; nc_num++) {
if (_ba_set_ionode_str_internal(
- level+1, coords,
- start_offset, end_offset,
- hl) == -1)
- return -1;
- }
- return 1;
- }
- snprintf(tmp_char, sizeof(tmp_char), "%c%c%c%c%c",
- alpha_num[coords[0]],
- alpha_num[coords[1]],
- alpha_num[coords[2]],
- alpha_num[coords[3]],
- alpha_num[coords[4]]);
- hostlist_push_host_dims(hl, tmp_char, 5);
- return 1;
-}
-
-extern char *ba_set_ionode_str(bitstr_t *ionode_bitmap)
-{
- char *ionode_str = NULL;
-
- if (ionode_bitmap) {
- /* bit_fmt(bitstring, BITSIZE, ionode_bitmap); */
- /* return xstrdup(bitstring); */
- int ionode_num;
- hostlist_t hl = hostlist_create_dims("", 5);
- int coords[5];
-
- for (ionode_num = bit_ffs(ionode_bitmap);
- ionode_num <= bit_fls(ionode_bitmap);
- ionode_num++) {
- int nc_num, nc_start, nc_end;
- if (!bit_test(ionode_bitmap, ionode_num))
- continue;
-
- nc_start = ionode_num * (int)bg_conf->nc_ratio;
- nc_end = nc_start + (int)bg_conf->nc_ratio;
-
- for (nc_num = nc_start; nc_num < nc_end; nc_num++) {
- if (_ba_set_ionode_str_internal(
- 0, coords,
- g_nc_coords[nc_num].start,
- g_nc_coords[nc_num].end,
- hl)
- == -1) {
- hostlist_destroy(hl);
- hl = NULL;
- break;
- }
+ 0, coords,
+ g_nc_coords[nc_num].start,
+ g_nc_coords[nc_num].end,
+ hl)
+ == -1) {
+ hostlist_destroy(hl);
+ hl = NULL;
+ return;
}
}
- if (hl) {
- ionode_str = hostlist_ranged_string_xmalloc_dims(
- hl, 5, 0);
- //info("iostring is %s", ionode_str);
- hostlist_destroy(hl);
- hl = NULL;
- }
}
- return ionode_str;
+
+ bg_record->ionode_str = hostlist_ranged_string_xmalloc_dims(hl, 5, 0);
+ //info("iostring is %s", bg_record->ionode_str);
+ hostlist_destroy(hl);
+ hl = NULL;
}
-/*
- * This function is here to check options for rotating and elongating
- * and set up the request based on the count of each option
- */
-static int _check_for_options(select_ba_request_t* ba_request)
+/* Check to see if a job has been added to the bg_record NO_VAL
+ * returns the first one on the list. */
+extern struct job_record *ba_remove_job_in_block_job_list(
+ bg_record_t *bg_record, struct job_record *in_job_ptr)
{
- ba_geo_table_t *ba_geo_table;
+ ListIterator itr;
+ struct job_record *job_ptr = NULL;
+ select_jobinfo_t *jobinfo;
+ ba_mp_t *ba_mp;
+ char *tmp_char = NULL, *tmp_char2 = NULL, *tmp_char3 = NULL;
+ bool bad_magic = 0;
+ bitstr_t *used_cnodes = NULL;
- if (ba_request->geo_table) {
- ba_geo_table = ba_request->geo_table;
- ba_request->geo_table = ba_geo_table->next_ptr;
- }
+ xassert(bg_record);
- if (ba_request->geo_table) {
- ba_geo_table = ba_request->geo_table;
- memcpy(ba_request->geometry, ba_geo_table->geometry,
- sizeof(ba_geo_table->geometry));
- /* info("now trying %c%c%c%c", */
- /* alpha_num[ba_request->geometry[A]], */
- /* alpha_num[ba_request->geometry[X]], */
- /* alpha_num[ba_request->geometry[Y]], */
- /* alpha_num[ba_request->geometry[Z]]); */
- return 1;
- }
- return 0;
-}
+ if (!bg_record->job_list)
+ return NULL;
-/*
- * Fill in the paths and extra midplanes we need for the block.
- * Basically copy the starting coords sent in starting at block_start
- * ending with block_end in every midplane for the block. This
- * function does not finish torus' (use _finish_torus for that).
- *
- * IN/OUT results - total list of midplanes after this function
- * returns successfully.
- * IN level - which dimension we are on. Since this is a recursive
- * function calls to this function should always be 'A' when
- * starting.
- * IN start_mp - starting location of the block, should be the ba_mp
- * from the block_start.
- * IN block_start - starting point of the block.
- * IN block_end - ending point of the block.
- * IN coords - Where we are recursively. So this should just be an
- * uninitialized int [SYSTEM_DIMENSIONS]
- *
- * RET: -1 on failure 1 on success
- */
-static int _fill_in_coords(List results, int level, ba_mp_t *start_mp,
- ba_mp_t **check_mp, int *block_start,
- int *block_end, int *coords)
-{
- int dim;
- int count_outside = 0;
- uint16_t used = 0;
- ba_mp_t *curr_mp;
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
- if (level > cluster_dims)
- return -1;
-
- if (level < cluster_dims) {
- check_mp[level] = start_mp;
- coords[level] = start_mp->coord[level];
- do {
- /* handle the outter dims here */
- if (_fill_in_coords(
- results, level+1, start_mp,
- check_mp, block_start, block_end,
- coords) == -1)
- return -1;
- if (check_mp[level]->alter_switch[level].usage
- & BG_SWITCH_OUT_PASS)
- check_mp[level] =
- check_mp[level]->next_mp[level];
- else {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("mp %s(%d) isn't connected "
- "anymore, we found the end.",
- check_mp[level]->coord_str, level);
- return 0;
- }
- if (coords[level] < (DIM_SIZE[level]-1))
- coords[level]++;
- else
- coords[level] = 0;
- } while (coords[level] != start_mp->coord[level]);
- return 1;
- }
-
- curr_mp = &ba_main_grid[coords[A]][coords[X]][coords[Y]][coords[Z]];
-
- /* info("looking at %s", curr_mp->coord_str); */
- for (dim=0; dim<cluster_dims; dim++) {
- /* If this is only used for passthrough, skip since
- the _finish_torus code will catch things there.
+ if (in_job_ptr && in_job_ptr->magic != JOB_MAGIC) {
+ /* This can happen if the mmcs job hangs out in the system
+ * forever, or at least gets cleared a after the SLURM
+ * job is out of the controller.
+ */
+ bad_magic = 1;
+ used_cnodes = bit_copy(ba_mp->cnode_bitmap);
+ /* Take out the part (if any) of the midplane that
+ isn't part of the block.
*/
- if (check_mp[dim]->used & BA_MP_USED_PASS_BIT) {
- used = check_mp[dim]->used;
+ bit_not(ba_mp->cnode_usable_bitmap);
+ bit_and(used_cnodes, ba_mp->cnode_usable_bitmap);
+ bit_not(ba_mp->cnode_usable_bitmap);
+ }
+again:
+ itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("on block %s we found a job with bad magic",
+ bg_record->bg_block_id);
+ list_delete_item(itr);
+ continue;
+ } else if (bad_magic) {
+ jobinfo = job_ptr->select_jobinfo->data;
+ if (!jobinfo->units_avail) {
+ error("ba_remove_job_in_block_job_list: "
+ "no units avail bitmap on the jobinfo, "
+ "continuing");
+ continue;
+ }
+ bit_not(jobinfo->units_avail);
+ bit_and(used_cnodes, jobinfo->units_avail);
+ bit_not(jobinfo->units_avail);
+
+ continue;
+ }
+
+ if (!in_job_ptr) {
+ /* if there is not an in_job_ptr it is because
+ the jobs finished while the slurmctld
+ wasn't running and somehow the state was
+ messed up. So the cpus were never added to
+ the mix, so don't remove them. This should
+ probably never happen.
+ */
+ //num_unused_cpus += job_ptr->total_cpus;
+ list_delete_item(itr);
+ continue;
+ }
+
+ if (job_ptr == in_job_ptr) {
+ num_unused_cpus += job_ptr->total_cpus;
+ list_delete_item(itr);
break;
}
+ }
+ list_iterator_destroy(itr);
- /* info("inside at %s %d %d %d", check_mp[dim]->coord_str, */
- /* dim, check_mp[dim]->used, used); */
-
- /* If we get over 2 in any dim that we are
- greater here we are pass anything we need to
- passthrough, so break.
+ if (!in_job_ptr) {
+ if (ba_mp->cnode_usable_bitmap) {
+ FREE_NULL_BITMAP(ba_mp->cnode_bitmap);
+ ba_mp->cnode_bitmap =
+ bit_copy(ba_mp->cnode_usable_bitmap);
+ } else if (ba_mp->cnode_bitmap)
+ bit_nclear(ba_mp->cnode_bitmap, 0,
+ bit_size(ba_mp->cnode_bitmap)-1);
+ return NULL;
+ } else if (!job_ptr && !bad_magic) {
+ /* If the job was not found reset the block with the
+ running jobs and go from there.
*/
-
- /* info("passthrough %d used %d %d %d %d", dim, used, */
- /* curr_mp->coord[dim], block_start[dim], */
- /* block_end[dim]); */
- if ((curr_mp->coord[dim] < block_start[dim])
- || (curr_mp->coord[dim] > block_end[dim])) {
- count_outside++;
- /* info("yes under %d", count_outside); */
- if (count_outside > 1)
- break;
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) {
+ error("ba_remove_job_in_block_job_list: "
+ "Couldn't remove sub-block job %u from "
+ "block %s",
+ in_job_ptr->job_id, bg_record->bg_block_id);
}
+ bad_magic = 1;
+ used_cnodes = bit_copy(ba_mp->cnode_bitmap);
+ goto again;
}
- /* info("got used of %d %d", used, count_outside); */
- if (dim < cluster_dims) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("skipping non-used %s if needed for "
- "passthrough it should be handled in "
- "_finish_torus",
- curr_mp->coord_str);
- return 1;
- }
+ if (bad_magic) {
+ uint32_t current_cnode_cnt = bit_set_count(used_cnodes);
- for (dim=0; dim<cluster_dims; dim++) {
- int rc;
+ num_unused_cpus += current_cnode_cnt * bg_conf->cpu_ratio;
- /* If we are passing though skip all except the
- actual passthrough dim.
- */
- if ((used & BA_MP_USED_PASS_BIT)
- && (check_mp[dim]->used != used)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("skipping here %s(%d)",
- curr_mp->coord_str, dim);
- continue;
+ bit_not(used_cnodes);
+ bit_and(ba_mp->cnode_bitmap, used_cnodes);
+ bit_not(used_cnodes);
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) {
+ debug("ba_remove_job_in_block_job_list: "
+ "Removing old sub-block job using %d cnodes "
+ "from block %s",
+ current_cnode_cnt, bg_record->bg_block_id);
+ }
+ } else {
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) {
+ debug("ba_remove_job_in_block_job_list: "
+ "Removing sub-block job %u from block %s",
+ job_ptr->job_id, bg_record->bg_block_id);
}
- /* ba_mp_t *orig_mp = check_mp[dim]; */
- /* ba_mp_t *ba_mp = curr_mp; */
- /* info("looking to put " */
- /* "mp %s(%d) %s onto mp %s(%d) %s", */
- /* orig_mp->coord_str, dim, */
- /* ba_switch_usage_str(orig_mp->alter_switch[dim].usage),*/
- /* ba_mp->coord_str, dim, */
- /* ba_switch_usage_str(ba_mp->alter_switch[dim].usage)); */
+ jobinfo = job_ptr->select_jobinfo->data;
- /* if 1 is returned we haven't visited this mp yet,
- and need to add it to the list
- */
- if ((rc = _copy_ba_switch(curr_mp, check_mp[dim], dim)) == -1)
- return rc;
- else if (rc == 1)
- list_append(results, curr_mp);
+ if (!jobinfo->units_avail) {
+ error("ba_remove_job_in_block_job_list: "
+ "no units avail bitmap on the jobinfo");
+ return job_ptr;
+ }
+ used_cnodes = jobinfo->units_avail;
}
- return 1;
+
+ bit_not(used_cnodes);
+ bit_and(ba_mp->cnode_bitmap, used_cnodes);
+ bit_not(used_cnodes);
+
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_ALGO) {
+ bitstr_t *total_bitmap = bit_copy(ba_mp->cnode_bitmap);
+ if (ba_mp->cnode_err_bitmap) {
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
+ tmp_char3 = ba_node_map_ranged_hostlist(
+ ba_mp->cnode_err_bitmap,
+ ba_mp_geo_system);
+ }
+
+ tmp_char = ba_node_map_ranged_hostlist(
+ used_cnodes, ba_mp_geo_system);
+ bit_not(total_bitmap);
+ tmp_char2 = ba_node_map_ranged_hostlist(
+ total_bitmap, ba_mp_geo_system);
+ info("ba_remove_job_in_block_job_list: "
+ "cleared cnodes %s on mp %s, making '%s' "
+ "on this midplane usable in this block (%s), "
+ "%s are in Software Failure",
+ tmp_char, ba_mp->coord_str, tmp_char2,
+ bg_record->bg_block_id, tmp_char3);
+ xfree(tmp_char);
+ xfree(tmp_char2);
+ xfree(tmp_char3);
+ FREE_NULL_BITMAP(total_bitmap);
+ }
+
+ if (bad_magic)
+ FREE_NULL_BITMAP(used_cnodes);
+
+ return job_ptr;
}
-/*
- * Finish wiring a block together given start and end points. All
- * used nodes should be marked inside those points before this
- * function is called.
- *
- * IN/OUT results - total list of midplanes after this function
- * returns successfully.
- * IN level - which dimension we are on. Since this is a recursive
- * function calls to this function should always be 'A' when
- * starting.
- * IN block_start - starting point of the block.
- * IN block_end - ending point of the block.
- * IN conn_type - Mesh or Torus for each Dim.
- * IN coords - Where we are recursively. So this should just be an
- * uninitialized int [SYSTEM_DIMENSIONS]
- *
- * RET: -1 on failure 1 on success
- */
-static int _finish_torus(List results, int level, int *block_start,
- int *block_end, uint16_t *conn_type, int *coords)
+extern int ba_translate_coord2nc(uint16_t *cnode_coords)
{
- int dim;
- ba_mp_t *curr_mp, *start_mp;
-
- if (level > cluster_dims)
- return -1;
-
- if (level < cluster_dims) {
- for (coords[level] = block_start[level];
- coords[level] <= block_end[level];
- coords[level]++) {
- /* handle the outter dims here */
- if (_finish_torus(
- results, level+1,
- block_start, block_end,
- conn_type, coords) == -1)
- return -1;
+ int nc_loc, dim, match;
+ /* need to figure out which nodeboard this cnode is in */
+ for (nc_loc=0; nc_loc<16; nc_loc++) {
+ match = 0;
+ for (dim = 0; dim < 5; dim++) {
+ if ((cnode_coords[dim]
+ >= g_nc_coords[nc_loc].start[dim])
+ && (cnode_coords[dim]
+ <= g_nc_coords[nc_loc].end[dim]))
+ match++;
}
- return 1;
+ if (match == 5)
+ break;
}
+ xassert(nc_loc < 16);
+ return nc_loc;
+}
- curr_mp = &ba_main_grid[coords[A]][coords[X]][coords[Y]][coords[Z]];
- if (!(curr_mp->used & BA_MP_USED_ALTERED)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_finish_torus: skipping non-used %s",
- curr_mp->coord_str);
- return 1;
- }
- start_mp = curr_mp;
-
- /* info("_finish_torus: starting with %s", */
- /* curr_mp->coord_str); */
-
- for (dim=0; dim<cluster_dims; dim++) {
- if (conn_type[dim] != SELECT_TORUS)
- continue;
- if (!(start_mp->alter_switch[dim].usage & BG_SWITCH_OUT_PASS)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("finish_torus: mp %s(%d) already "
- "terminated %s",
- curr_mp->coord_str, dim,
- ba_switch_usage_str(
- start_mp->alter_switch->usage));
- continue;
- }
-
- curr_mp = start_mp->next_mp[dim];
- while (curr_mp != start_mp) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_finish_torus: looking at %s(%d)",
- curr_mp->coord_str, dim);
- if (!(curr_mp->used & BA_MP_USED_ALTERED)) {
- ba_switch_t *axis_switch =
- &curr_mp->axis_switch[dim];
- ba_switch_t *alter_switch =
- &curr_mp->alter_switch[dim];
- if (axis_switch->usage & BG_SWITCH_PASS_USED) {
- info("_finish_torus: got a bad "
- "axis_switch at "
- "%s(%d) %s %s",
- curr_mp->coord_str, dim,
- ba_switch_usage_str(
- axis_switch->usage),
- ba_switch_usage_str(
- alter_switch->usage));
- xassert(0);
- }
- alter_switch->usage |= BG_SWITCH_PASS;
- curr_mp->used |= BA_MP_USED_ALTERED_PASS;
- list_append(results, curr_mp);
-
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_finish_torus: using mp %s(%d) "
- "to finish torus %s added %s",
- curr_mp->coord_str, dim,
- ba_switch_usage_str(
- axis_switch->usage),
- ba_switch_usage_str(
- alter_switch->usage));
- } else if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_finish_torus: skipping already "
- "set %s(%d) %s",
- curr_mp->coord_str, dim,
- ba_switch_usage_str(
- curr_mp->alter_switch[dim].usage));
- curr_mp = curr_mp->next_mp[dim];
- }
- /* info("_finish_torus: ended with %s(%d)", */
- /* curr_mp->coord_str, dim); */
- }
-
- return 1;
+/* ba_system_mutex needs to be locked before calling this. */
+extern ba_mp_t *ba_inx2ba_mp(int inx)
+{
+ return ba_main_grid_array[inx];
}
static char *_copy_from_main(List main_mps, List ret_list)
@@ -1682,92 +1858,6 @@
return name;
}
-static int _copy_ba_switch(ba_mp_t *ba_mp, ba_mp_t *orig_mp, int dim)
-{
- int rc = 0;
- if (ba_mp->alter_switch[dim].usage != BG_SWITCH_NONE) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "switch already set %s(%d)",
- ba_mp->coord_str, dim);
- return 0;
- }
-
- if (orig_mp->alter_switch[dim].usage == BG_SWITCH_NONE) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "switch not needed %s(%d)",
- ba_mp->coord_str, dim);
- return 0;
- }
-
- if ((orig_mp->used & BA_MP_USED_PASS_BIT)
- || (ba_mp->used & BA_MP_USED_PASS_BIT)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "pass bit set %d %d",
- orig_mp->alter_switch[dim].usage
- & BG_SWITCH_PASS_FLAG,
- ba_mp->alter_switch[dim].usage
- & BG_SWITCH_PASS_FLAG);
- if (!(orig_mp->alter_switch[dim].usage & BG_SWITCH_PASS_FLAG)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "skipping %s(%d)", ba_mp->coord_str, dim);
- return 0;
- }
- } else if (_mp_used(ba_mp, dim)) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "%s is already used", ba_mp->coord_str);
- return -1;
- }
-
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "mapping %s(%d) %s to %s(%d) %s",
- orig_mp->coord_str, dim,
- ba_switch_usage_str(orig_mp->alter_switch[dim].usage),
- ba_mp->coord_str, dim,
- ba_switch_usage_str(ba_mp->alter_switch[dim].usage));
-
- if (ba_mp->axis_switch[dim].usage & orig_mp->alter_switch[dim].usage) {
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "can't use %s(%d) switch %s "
- "overlapped with request %s",
- ba_mp->coord_str, dim,
- ba_switch_usage_str(
- ba_mp->axis_switch[dim].usage),
- ba_switch_usage_str(
- orig_mp->alter_switch[dim].usage));
- return -1;
- }
-
- /* If we return 1 it means we haven't yet looked at this
- * midplane so add it to the list */
- if (!(ba_mp->used & BA_MP_USED_ALTERED))
- rc = 1;
-
- /* set up the usage of the midplane */
- if (orig_mp->used & BA_MP_USED_PASS_BIT)
- ba_mp->used |= BA_MP_USED_ALTERED_PASS;
- else
- ba_mp->used |= BA_MP_USED_ALTERED;
-
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_copy_ba_switch: "
- "mp %s(%d) adds %s to mp %s(%d) %s %d",
- orig_mp->coord_str, dim,
- ba_switch_usage_str(orig_mp->alter_switch[dim].usage),
- ba_mp->coord_str, dim,
- ba_switch_usage_str(ba_mp->alter_switch[dim].usage),
- ba_mp->used);
- ba_mp->alter_switch[dim].usage |= orig_mp->alter_switch[dim].usage;
-
- return rc;
-}
-
static int _check_deny_pass(int dim)
{
if (!deny_pass || !*deny_pass)
@@ -1810,9 +1900,9 @@
return 0;
}
-static int _find_path(List mps, ba_mp_t *start_mp, int dim,
- uint16_t geometry, uint16_t conn_type,
- int *block_start, int *block_end)
+static int _fill_in_wires(List mps, ba_mp_t *start_mp, int dim,
+ uint16_t geometry, uint16_t conn_type,
+ bool full_check)
{
ba_mp_t *curr_mp = start_mp->next_mp[dim];
ba_switch_t *axis_switch = NULL;
@@ -1821,33 +1911,18 @@
int add = 0;
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_find_path: at mp %s(%d) geo %d switches at %s and %s",
+ info("_fill_in_wires: at mp %s(%d) geo %d switches "
+ "at %s and %s",
start_mp->coord_str, dim, geometry,
ba_switch_usage_str(start_mp->axis_switch[dim].usage),
ba_switch_usage_str(start_mp->alter_switch[dim].usage));
- if (_mp_used(start_mp, dim))
- return 0;
-
axis_switch = &start_mp->axis_switch[dim];
alter_switch = &start_mp->alter_switch[dim];
- if (geometry == 1) {
- /* Always check MESH here since we only care about the
- IN/OUT ports.
- */
- start_mp->used |= BA_MP_USED_ALTERED;
- /* all 1 dimensions need a TORUS */
- alter_switch->usage |= BG_SWITCH_WRAPPED;
- if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_find_path: using mp %s(%d) in 1 geo %s added %s",
- start_mp->coord_str, dim,
- ba_switch_usage_str(axis_switch->usage),
- ba_switch_usage_str(alter_switch->usage));
- return 1;
- }
+
if (_mp_out_used(start_mp, dim))
return 0;
- start_mp->used |= BA_MP_USED_ALTERED;
+
alter_switch->usage |= BG_SWITCH_OUT;
alter_switch->usage |= BG_SWITCH_OUT_PASS;
@@ -1860,14 +1935,16 @@
/* This should never happen since we got here
from an unused mp */
if (axis_switch->usage & BG_SWITCH_IN_PASS) {
- info("_find_path: got a bad axis_switch at %s %d %s %s",
+ info("_fill_in_wires: got a bad axis_switch "
+ "at %s %d %s %s",
curr_mp->coord_str, dim,
ba_switch_usage_str(axis_switch->usage),
ba_switch_usage_str(alter_switch->usage));
xassert(0);
}
- if ((count < geometry) && !_mp_used(curr_mp, dim)) {
+ if ((count < geometry)
+ && (curr_mp->used & BA_MP_USED_ALTERED)) {
/* if (curr_mp->coord[dim] < start_mp->coord[dim]) { */
/* if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) */
/* info("Available mp %s(%d) is less " */
@@ -1878,23 +1955,16 @@
/* start_mp->coord_str, dim); */
/* return 0; */
/* } */
- if (curr_mp->coord[dim] < *block_start)
- *block_start = curr_mp->coord[dim];
-
- if (curr_mp->coord[dim] > *block_end)
- *block_end = curr_mp->coord[dim];
count++;
- if (!(curr_mp->used & BA_MP_USED_ALTERED)) {
- add = 1;
- curr_mp->used |= BA_MP_USED_ALTERED;
- }
alter_switch->usage |= BG_SWITCH_IN_PASS;
alter_switch->usage |= BG_SWITCH_IN;
if ((count < geometry) || (conn_type == SELECT_TORUS)) {
+ if (_mp_out_used(curr_mp, dim))
+ return 0;
alter_switch->usage |= BG_SWITCH_OUT;
alter_switch->usage |= BG_SWITCH_OUT_PASS;
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_find_path: using mp %s(%d) "
+ info("_fill_in_wires: using mp %s(%d) "
"%d(%d) %s added %s",
curr_mp->coord_str, dim,
count, geometry,
@@ -1904,7 +1974,7 @@
alter_switch->usage));
} else if (conn_type == SELECT_MESH) {
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
- info("_find_path: using mp %s(%d) "
+ info("_fill_in_wires: using mp %s(%d) "
"%d(%d) %s added %s",
curr_mp->coord_str, dim,
count, geometry,
@@ -1912,29 +1982,42 @@
axis_switch->usage),
ba_switch_usage_str(
alter_switch->usage));
- if (add)
- list_append(mps, curr_mp);
return 1;
}
} else if (!_mp_out_used(curr_mp, dim)
&& !_check_deny_pass(dim)) {
+
+ if (!full_check
+ && bridge_check_nodeboards(curr_mp->loc)) {
+ if (ba_debug_flags
+ & DEBUG_FLAG_BG_ALGO_DEEP) {
+ info("_fill_in_wires: can't "
+ "use mp %s(%d) "
+ "as passthrough it has "
+ "nodeboards not available",
+ curr_mp->coord_str, dim);
+ }
+ return 0;
+ }
if (!(curr_mp->used & BA_MP_USED_ALTERED)) {
add = 1;
curr_mp->used |= BA_MP_USED_ALTERED_PASS;
+ } else {
+ error("WHAT? %s", curr_mp->coord_str);
}
alter_switch->usage |= BG_SWITCH_PASS;
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
if (count == geometry) {
- info("_find_path: using mp %s(%d) to "
- "finish torus %s added %s",
+ info("_fill_in_wires: using mp %s(%d) "
+ "to finish torus %s added %s",
curr_mp->coord_str, dim,
ba_switch_usage_str(
axis_switch->usage),
ba_switch_usage_str(
alter_switch->usage));
} else {
- info("_find_path: using mp %s(%d) as "
- "passthrough %s added %s",
+ info("_fill_in_wires: using mp %s(%d) "
+ "as passthrough %s added %s",
curr_mp->coord_str, dim,
ba_switch_usage_str(
axis_switch->usage),
@@ -1944,7 +2027,7 @@
}
} else {
/* we can't use this so return with a nice 0 */
- info("_find_path: we can't use this so return");
+ info("_fill_in_wires: we can't use this so return");
return 0;
}
@@ -1962,7 +2045,8 @@
/* This should never happen since we got here
from an unused mp */
if (axis_switch->usage & BG_SWITCH_IN_PASS) {
- info("_find_path: 2 got a bad axis_switch at %s %d %s",
+ info("_fill_in_wires: 2 got a bad axis_switch "
+ "at %s %d %s",
curr_mp->coord_str, dim,
ba_switch_usage_str(axis_switch->usage));
xassert(0);
@@ -2068,8 +2152,9 @@
xassert(ba_mp);
/* If the mp is already used just check the PASS_USED. */
- if ((ba_mp->axis_switch[dim].usage & BG_SWITCH_PASS_USED)
- || (ba_mp->alter_switch[dim].usage & BG_SWITCH_PASS_USED)) {
+ if ((ba_mp->axis_switch[dim].usage & BG_SWITCH_CABLE_ERROR_SET)
+ || (ba_mp->axis_switch[dim].usage & BG_SWITCH_OUT_PASS)
+ || (ba_mp->alter_switch[dim].usage & BG_SWITCH_OUT_PASS)) {
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
info("mp %s(%d) has passthroughs used (%s)",
ba_mp->coord_str, dim, ba_switch_usage_str(
@@ -2080,3 +2165,133 @@
return false;
}
+static uint16_t _find_distance(uint16_t start, uint16_t end, int dim)
+{
+ if (end < start)
+ return (((DIM_SIZE[dim]-1) - start) + end) * 4;
+ else
+ return (end - start) * 4;
+}
+
+static int _ba_set_ionode_str_internal(int level, int *coords,
+ int *start_offset, int *end_offset,
+ hostlist_t hl)
+{
+ char tmp_char[6];
+
+ xassert(hl);
+
+ if (level > 5)
+ return -1;
+
+ if (level < 5) {
+ for (coords[level] = start_offset[level];
+ coords[level] <= end_offset[level];
+ coords[level]++) {
+ /* handle the outter dims here */
+ if (_ba_set_ionode_str_internal(
+ level+1, coords,
+ start_offset, end_offset,
+ hl) == -1)
+ return -1;
+ }
+ return 1;
+ }
+ snprintf(tmp_char, sizeof(tmp_char), "%c%c%c%c%c",
+ alpha_num[coords[0]],
+ alpha_num[coords[1]],
+ alpha_num[coords[2]],
+ alpha_num[coords[3]],
+ alpha_num[coords[4]]);
+ hostlist_push_host_dims(hl, tmp_char, 5);
+ return 1;
+}
+
+static bitstr_t *_find_sub_block(ba_geo_table_t **in_geo_table,
+ uint16_t *start_loc, bitstr_t *total_bitmap,
+ uint32_t node_count)
+{
+ int cnt = 0;
+ bitstr_t *found_bits = NULL;
+ uint32_t clear_cnt = bit_clear_count(total_bitmap);
+ ba_geo_table_t *geo_table = *in_geo_table;
+
+ if (clear_cnt < node_count) {
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
+ info("ba_pick_sub_block: only have %d avail need %d",
+ clear_cnt, node_count);
+ return NULL;
+ }
+
+ while (geo_table) {
+ int scan_offset = 0;
+
+ /* FIXME: In the current IBM API it doesn't
+ allow wrapping inside the midplane. In the
+ future this will change. When that happens
+ there will need to be a flag that is sent
+ here instead of always true.
+ */
+ if (ba_geo_test_all(total_bitmap,
+ &found_bits,
+ geo_table, &cnt,
+ ba_mp_geo_system, NULL,
+ start_loc, &scan_offset, true)
+ == SLURM_SUCCESS) {
+ if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
+ int dim;
+ info("scan_offset=%d", scan_offset);
+ for (dim = 0;
+ dim < ba_mp_geo_system->dim_count;
+ dim++) {
+ info("start_loc[%d]=%u geometry[%d]=%u",
+ dim, start_loc[dim], dim,
+ geo_table->geometry[dim]);
+ }
+ }
+ break;
+ }
+ geo_table = geo_table->next_ptr;
+ }
+
+ *in_geo_table = geo_table;
+
+ return found_bits;
+}
+
+static ba_geo_table_t *_find_geo_table(uint32_t orig_node_count,
+ uint32_t *node_count,
+ uint32_t total_count)
+{
+ ba_geo_table_t *geo_table = NULL;
+
+ while (!(geo_table = ba_mp_geo_system->geo_table_ptr[*node_count])) {
+ debug2("_find_geo_table: No geometries of size %u ",
+ *node_count);
+ (*node_count)++;
+ if (*node_count > total_count)
+ break;
+ }
+ if (*node_count > total_count) {
+ debug("_find_geo_table: requested sub-block larger "
+ "than block");
+ return NULL;
+ }
+
+ if (orig_node_count != *node_count)
+ debug("_find_geo_table: user requested %u nodes, "
+ "but that can't make a block, giving them %d",
+ orig_node_count, *node_count);
+
+ if (!geo_table) {
+ /* This should never happen */
+ error("_find_geo_table: "
+ "Couldn't place this job size %u tried up to "
+ "the full size of the block (%u)",
+ orig_node_count, total_count);
+ return NULL;
+ }
+
+ return geo_table;
+}
+
diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.h b/src/plugins/select/bluegene/ba_bgq/block_allocator.h
index 134e80b..337dc3b 100644
--- a/src/plugins/select/bluegene/ba_bgq/block_allocator.h
+++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.h
@@ -51,5 +51,9 @@
/* Global */
extern ba_mp_t ****ba_main_grid;
+extern ba_geo_system_t *ba_mp_geo_system;
+
+extern int ba_translate_coord2nc(uint16_t *cnode_coords);
+extern ba_mp_t *ba_inx2ba_mp(int inx);
#endif /* _BLOCK_ALLOCATOR_H_ */
diff --git a/src/plugins/select/bluegene/ba_bgq/wire_test.c b/src/plugins/select/bluegene/ba_bgq/wire_test.c
index 04a836a..d33238a 100644
--- a/src/plugins/select/bluegene/ba_bgq/wire_test.c
+++ b/src/plugins/select/bluegene/ba_bgq/wire_test.c
@@ -58,6 +58,7 @@
bg_lists_t *bg_lists;
pthread_mutex_t block_state_mutex = PTHREAD_MUTEX_INITIALIZER;
int bg_recover = 1;
+int num_unused_cpus = 0;
extern int bridge_init(char *properties_file)
{
@@ -79,6 +80,17 @@
return SLURM_ERROR;
}
+extern struct job_record *find_job_in_bg_record(bg_record_t *bg_record,
+ uint32_t job_id)
+{
+ return NULL;
+}
+
+extern int bridge_check_nodeboards(char *mp_loc)
+{
+ return 0;
+}
+
/** */
int main(int argc, char** argv)
{
diff --git a/src/plugins/select/bluegene/ba_common.c b/src/plugins/select/bluegene/ba_common.c
index 944a0c2..d868ae1 100644
--- a/src/plugins/select/bluegene/ba_common.c
+++ b/src/plugins/select/bluegene/ba_common.c
@@ -57,6 +57,7 @@
uint32_t ba_debug_flags = 0;
int DIM_SIZE[HIGHEST_DIMENSIONS];
bitstr_t *ba_main_mp_bitmap = NULL;
+pthread_mutex_t ba_system_mutex = PTHREAD_MUTEX_INITIALIZER;
static void _pack_ba_connection(ba_connection_t *ba_connection,
Buf buffer, uint16_t protocol_version)
@@ -145,24 +146,6 @@
return false;
}
-/* Translate a multi-dimension coordinate (3-D, 4-D, 5-D, etc.) into a 1-D
- * offset in the cnode* bitmap */
-static void _ba_node_xlate_to_1d(int *offset_1d, int *full_offset,
- ba_geo_system_t *my_geo_system)
-{
- int i, map_offset;
-
- xassert(offset_1d);
- xassert(full_offset);
- i = my_geo_system->dim_count - 1;
- map_offset = full_offset[i];
- for (i-- ; i >= 0; i--) {
- map_offset *= my_geo_system->dim_size[i];
- map_offset += full_offset[i];
- }
- *offset_1d = map_offset;
-}
-
#if DISPLAY_FULL_DIM
/* Translate a 1-D offset in the cnode bitmap to a multi-dimension
* coordinate (3-D, 4-D, 5-D, etc.) */
@@ -180,13 +163,11 @@
}
#endif
-static int _ba_node_map_set_range_internal(int level, int *coords,
+static int _ba_node_map_set_range_internal(int level, uint16_t *coords,
int *start_offset, int *end_offset,
bitstr_t *node_bitmap,
ba_geo_system_t *my_geo_system)
{
- int offset_1d;
-
xassert(my_geo_system);
if (level > my_geo_system->dim_count)
@@ -206,8 +187,7 @@
return 1;
}
- _ba_node_xlate_to_1d(&offset_1d, coords, my_geo_system);
- bit_set(node_bitmap, offset_1d);
+ ba_node_map_set(node_bitmap, coords, my_geo_system);
return 1;
}
@@ -345,7 +325,7 @@
{
int i;
bitstr_t *alloc_node_bitmap;
- int offset[my_geo_system->dim_count];
+ uint16_t offset[my_geo_system->dim_count];
alloc_node_bitmap = bit_alloc(my_geo_system->total_size);
memset(offset, 0, sizeof(offset));
@@ -359,7 +339,8 @@
}
/* Test if this coordinate is available for use */
if (i >= my_geo_system->dim_count) {
- if (ba_node_map_test(node_bitmap,offset,my_geo_system))
+ if (ba_node_map_test(
+ node_bitmap, offset, my_geo_system))
break; /* not available */
/* Set it in our bitmap for this job */
ba_node_map_set(alloc_node_bitmap, offset,
@@ -486,9 +467,12 @@
}
return;
}
- curr_mp = coord2ba_mp(coords);
- if (!curr_mp)
+
+ slurm_mutex_lock(&ba_system_mutex);
+ if (!(curr_mp = coord2ba_mp(coords))) {
+ slurm_mutex_unlock(&ba_system_mutex);
return;
+ }
if (bitmap)
is_set = bit_test(bitmap, curr_mp->index);
if (!bitmap || (is_set && !except) || (!is_set && except)) {
@@ -496,13 +480,15 @@
if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
info("can't use %s", curr_mp->coord_str);
curr_mp->used |= BA_MP_USED_TEMP;
- bit_set(ba_main_mp_bitmap, curr_mp->index);
+ bit_set(ba_main_mp_bitmap, curr_mp->ba_geo_index);
} else {
curr_mp->used &= (~BA_MP_USED_TEMP);
if (curr_mp->used == BA_MP_USED_FALSE)
- bit_clear(ba_main_mp_bitmap, curr_mp->index);
+ bit_clear(ba_main_mp_bitmap,
+ curr_mp->ba_geo_index);
}
}
+ slurm_mutex_unlock(&ba_system_mutex);
}
static void _internal_reset_ba_system(int level, uint16_t *coords,
@@ -523,14 +509,18 @@
}
return;
}
- curr_mp = coord2ba_mp(coords);
- if (!curr_mp)
+ slurm_mutex_lock(&ba_system_mutex);
+ if (!(curr_mp = coord2ba_mp(coords))) {
+ slurm_mutex_unlock(&ba_system_mutex);
return;
+ }
ba_setup_mp(curr_mp, track_down_mps, false);
- bit_clear(ba_main_mp_bitmap, curr_mp->index);
+ bit_clear(ba_main_mp_bitmap, curr_mp->ba_geo_index);
+ slurm_mutex_unlock(&ba_system_mutex);
}
#if defined HAVE_BG_FILES
+/* ba_system_mutex should be locked before calling. */
static ba_mp_t *_internal_loc2ba_mp(int level, uint16_t *coords,
const char *check)
{
@@ -796,11 +786,12 @@
_build_geo_bitmap_arrays(i);
}
-extern void destroy_ba_mp(void *ptr)
+extern void free_internal_ba_mp(ba_mp_t *ba_mp)
{
- ba_mp_t *ba_mp = (ba_mp_t *)ptr;
if (ba_mp) {
FREE_NULL_BITMAP(ba_mp->cnode_bitmap);
+ FREE_NULL_BITMAP(ba_mp->cnode_err_bitmap);
+ FREE_NULL_BITMAP(ba_mp->cnode_usable_bitmap);
xfree(ba_mp->loc);
if (ba_mp->nodecard_loc) {
int i;
@@ -808,6 +799,15 @@
xfree(ba_mp->nodecard_loc[i]);
xfree(ba_mp->nodecard_loc);
}
+
+ }
+}
+
+extern void destroy_ba_mp(void *ptr)
+{
+ ba_mp_t *ba_mp = (ba_mp_t *)ptr;
+ if (ba_mp) {
+ free_internal_ba_mp(ba_mp);
xfree(ba_mp);
}
}
@@ -817,21 +817,52 @@
int dim;
xassert(ba_mp);
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
+ _pack_ba_switch(&ba_mp->axis_switch[dim], buffer,
+ protocol_version);
+ pack16(ba_mp->coord[dim], buffer);
+ /* No need to pack the coord_str, we can figure that
+ out from the coords packed.
+ */
+ }
+ /* currently there is no need to pack
+ * ba_mp->cnode_bitmap */
- for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
- _pack_ba_switch(&ba_mp->axis_switch[dim], buffer,
- protocol_version);
- pack16(ba_mp->coord[dim], buffer);
- /* No need to pack the coord_str, we can figure that
- out from the coords packed.
+ /* currently there is no need to pack
+ * ba_mp->cnode_err_bitmap */
+
+ pack_bit_fmt(ba_mp->cnode_usable_bitmap, buffer);
+
+ pack16(ba_mp->used, buffer);
+ /* These are only used on the original, not in the
+ block ba_mp's.
+ ba_mp->alter_switch, ba_mp->index, ba_mp->loc,
+ ba_mp->next_mp, ba_mp->nodecard_loc,
+ ba_mp->prev_mp, ba_mp->state
+ */
+ } else {
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
+ _pack_ba_switch(&ba_mp->axis_switch[dim], buffer,
+ protocol_version);
+ pack16(ba_mp->coord[dim], buffer);
+ /* No need to pack the coord_str, we can figure that
+ out from the coords packed.
+ */
+ }
+ pack_bit_fmt(ba_mp->cnode_bitmap, buffer);
+
+ /* currently there is no need to pack
+ * ba_mp->cnode_err_bitmap */
+
+ pack16(ba_mp->used, buffer);
+ /* These are only used on the original, not in the
+ block ba_mp's.
+ ba_mp->alter_switch, ba_mp->index, ba_mp->loc,
+ ba_mp->next_mp, ba_mp->nodecard_loc,
+ ba_mp->prev_mp, ba_mp->state
*/
}
- pack_bit_fmt(ba_mp->cnode_bitmap, buffer);
- pack16(ba_mp->used, buffer);
- /* These are only used on the original, not in the block ba_mp's.
- ba_mp->alter_switch, ba_mp->index, ba_mp->loc, ba_mp->next_mp,
- ba_mp->nodecard_loc, ba_mp->prev_mp, ba_mp->state
- */
}
extern int unpack_ba_mp(ba_mp_t **ba_mp_pptr,
@@ -845,30 +876,68 @@
*ba_mp_pptr = ba_mp;
- for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
- if (_unpack_ba_switch(&ba_mp->axis_switch[dim], buffer,
- protocol_version)
- != SLURM_SUCCESS)
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
+ if (_unpack_ba_switch(&ba_mp->axis_switch[dim], buffer,
+ protocol_version)
+ != SLURM_SUCCESS)
+ goto unpack_error;
+ safe_unpack16(&ba_mp->coord[dim], buffer);
+ ba_mp->coord_str[dim] = alpha_num[ba_mp->coord[dim]];
+ }
+ ba_mp->coord_str[dim] = '\0';
+
+ safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
+ if (bit_char) {
+ ba_mp->cnode_usable_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+ bit_unfmt(ba_mp->cnode_usable_bitmap, bit_char);
+ xfree(bit_char);
+ ba_mp->cnode_bitmap =
+ bit_copy(ba_mp->cnode_usable_bitmap);
+ }
+ safe_unpack16(&ba_mp->used, buffer);
+
+ /* Since index could of changed here we will go figure
+ * it out again. */
+ slurm_mutex_lock(&ba_system_mutex);
+ if (!(orig_mp = coord2ba_mp(ba_mp->coord))) {
+ slurm_mutex_unlock(&ba_system_mutex);
goto unpack_error;
- safe_unpack16(&ba_mp->coord[dim], buffer);
- ba_mp->coord_str[dim] = alpha_num[ba_mp->coord[dim]];
+ }
+ ba_mp->index = orig_mp->index;
+ ba_mp->ba_geo_index = orig_mp->ba_geo_index;
+ slurm_mutex_unlock(&ba_system_mutex);
+ } else {
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) {
+ if (_unpack_ba_switch(&ba_mp->axis_switch[dim], buffer,
+ protocol_version)
+ != SLURM_SUCCESS)
+ goto unpack_error;
+ safe_unpack16(&ba_mp->coord[dim], buffer);
+ ba_mp->coord_str[dim] = alpha_num[ba_mp->coord[dim]];
+ }
+ ba_mp->coord_str[dim] = '\0';
+
+ safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
+ if (bit_char) {
+ ba_mp->cnode_bitmap = bit_alloc(bg_conf->mp_cnode_cnt);
+ bit_unfmt(ba_mp->cnode_bitmap, bit_char);
+ xfree(bit_char);
+ }
+ safe_unpack16(&ba_mp->used, buffer);
+
+ /* Since index could of changed here we will go figure
+ * it out again. */
+ slurm_mutex_lock(&ba_system_mutex);
+ if (!(orig_mp = coord2ba_mp(ba_mp->coord))) {
+ slurm_mutex_unlock(&ba_system_mutex);
+ goto unpack_error;
+ }
+ ba_mp->index = orig_mp->index;
+ ba_mp->ba_geo_index = orig_mp->ba_geo_index;
+ slurm_mutex_unlock(&ba_system_mutex);
}
- ba_mp->coord_str[dim] = '\0';
-
- safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
- if (bit_char) {
- ba_mp->cnode_bitmap = bit_alloc(bg_conf->mp_cnode_cnt);
- bit_unfmt(ba_mp->cnode_bitmap, bit_char);
- xfree(bit_char);
- }
- safe_unpack16(&ba_mp->used, buffer);
-
- /* Since index could of changed here we will go figure it out again. */
- orig_mp = coord2ba_mp(ba_mp->coord);
- if (!orig_mp)
- goto unpack_error;
- ba_mp->index = orig_mp->index;
-
return SLURM_SUCCESS;
unpack_error:
@@ -877,7 +946,9 @@
return SLURM_ERROR;
}
-
+/* If used in the bluegene plugin this ba_system_mutex must be
+ * locked. Don't work about it in programs like smap.
+ */
extern ba_mp_t *str2ba_mp(const char *coords)
{
uint16_t coord[cluster_dims];
@@ -905,11 +976,16 @@
return NULL;
}
+ if (bridge_setup_system() != SLURM_SUCCESS)
+ return NULL;
+
return coord2ba_mp(coord);
}
/*
* find a base blocks bg location (rack/midplane)
+ * If used in the bluegene plugin this ba_system_mutex must be
+ * locked. Don't work about it in programs like smap.
*/
extern ba_mp_t *loc2ba_mp(const char* mp_id)
{
@@ -970,11 +1046,12 @@
int i;
uint16_t node_base_state = ba_mp->state & NODE_STATE_BASE;
- if (!track_down_mps ||((node_base_state != NODE_STATE_DOWN)
- && !(ba_mp->state & NODE_STATE_DRAIN)))
+ if (!track_down_mps || ((node_base_state != NODE_STATE_DOWN)
+ && !(ba_mp->state & NODE_STATE_DRAIN)))
ba_mp->used = BA_MP_USED_FALSE;
- for (i=0; i<cluster_dims; i++){
+ for (i=0; i<cluster_dims; i++) {
+ bool set_error = 0;
#ifdef HAVE_BG_L_P
int j;
for (j=0;j<NUM_PORTS_PER_NODE;j++) {
@@ -987,10 +1064,22 @@
ba_mp->axis_switch[i].int_wire[j].port_tar = j;
}
#endif
+ if (ba_mp->axis_switch[i].usage & BG_SWITCH_CABLE_ERROR)
+ set_error = 1;
+
if (wrap_it)
ba_mp->axis_switch[i].usage = BG_SWITCH_WRAPPED;
else
ba_mp->axis_switch[i].usage = BG_SWITCH_NONE;
+
+ if (set_error) {
+ if (track_down_mps)
+ ba_mp->axis_switch[i].usage
+ |= BG_SWITCH_CABLE_ERROR_FULL;
+ else
+ ba_mp->axis_switch[i].usage
+ |= BG_SWITCH_CABLE_ERROR;
+ }
ba_mp->alter_switch[i].usage = BG_SWITCH_NONE;
}
}
@@ -1006,6 +1095,7 @@
ba_mp_t *new_ba_mp = (ba_mp_t *)xmalloc(sizeof(ba_mp_t));
memcpy(new_ba_mp, ba_mp, sizeof(ba_mp_t));
+
/* we have to set this or we would be pointing to the original */
memset(new_ba_mp->next_mp, 0, sizeof(new_ba_mp->next_mp));
/* we have to set this or we would be pointing to the original */
@@ -1014,6 +1104,8 @@
new_ba_mp->nodecard_loc = NULL;
new_ba_mp->loc = NULL;
new_ba_mp->cnode_bitmap = NULL;
+ new_ba_mp->cnode_err_bitmap = NULL;
+ new_ba_mp->cnode_usable_bitmap = NULL;
return new_ba_mp;
}
@@ -1063,7 +1155,8 @@
}
}
-extern void ba_create_geo_table(ba_geo_system_t *my_geo_system)
+extern void ba_create_geo_table(ba_geo_system_t *my_geo_system,
+ bool avoid_three)
{
ba_geo_table_t *geo_ptr;
int dim, inx[my_geo_system->dim_count], passthru, product;
@@ -1086,18 +1179,23 @@
(my_geo_system->total_size+1));
do {
+ bool found_three = 0;
/* Store new value */
geo_ptr = xmalloc(sizeof(ba_geo_table_t));
geo_ptr->geometry = xmalloc(sizeof(uint16_t) *
my_geo_system->dim_count);
product = 1;
for (dim = 0; dim < my_geo_system->dim_count; dim++) {
+ if (avoid_three && (inx[dim] == 3)) {
+ found_three = 1;
+ goto next_geo;
+ }
geo_ptr->geometry[dim] = inx[dim];
product *= inx[dim];
- passthru = inx[dim] - my_geo_system->dim_size[dim];
+ passthru = my_geo_system->dim_size[dim] - inx[dim];
if (passthru == 0)
geo_ptr->full_dim_cnt++;
- else if (passthru > 1)
+ else if ((passthru > 1) && (inx[dim] > 1))
geo_ptr->passthru_cnt += passthru;
}
geo_ptr->size = product;
@@ -1118,6 +1216,11 @@
}
geo_ptr->next_ptr = *last_pptr;
*last_pptr = geo_ptr;
+ next_geo:
+ if (found_three) {
+ xfree(geo_ptr->geometry);
+ xfree(geo_ptr);
+ }
} while (_incr_geo(inx, my_geo_system)); /* Generate next geometry */
}
@@ -1174,13 +1277,10 @@
* IN full_offset - N-dimension zero-origin offset to set
* IN my_geo_system - system geometry specification
*/
-extern void ba_node_map_set(bitstr_t *node_bitmap, int *full_offset,
+extern void ba_node_map_set(bitstr_t *node_bitmap, uint16_t *full_offset,
ba_geo_system_t *my_geo_system)
{
- int offset_1d;
-
- _ba_node_xlate_to_1d(&offset_1d, full_offset, my_geo_system);
- bit_set(node_bitmap, offset_1d);
+ bit_set(node_bitmap, ba_node_xlate_to_1d(full_offset, my_geo_system));
}
/*
@@ -1194,7 +1294,7 @@
int *start_offset, int *end_offset,
ba_geo_system_t *my_geo_system)
{
- int coords[5];
+ uint16_t coords[HIGHEST_DIMENSIONS];
_ba_node_map_set_range_internal(0, coords, start_offset, end_offset,
node_bitmap, my_geo_system);
@@ -1206,13 +1306,11 @@
* IN full_offset - N-dimension zero-origin offset to test
* IN my_geo_system - system geometry specification
*/
-extern int ba_node_map_test(bitstr_t *node_bitmap, int *full_offset,
+extern int ba_node_map_test(bitstr_t *node_bitmap, uint16_t *full_offset,
ba_geo_system_t *my_geo_system)
{
- int offset_1d;
-
- _ba_node_xlate_to_1d(&offset_1d, full_offset, my_geo_system);
- return bit_test(node_bitmap, offset_1d);
+ return bit_test(node_bitmap,
+ ba_node_xlate_to_1d(full_offset, my_geo_system));
}
/*
@@ -1398,6 +1496,24 @@
return rc;
}
+/* Translate a multi-dimension coordinate (3-D, 4-D, 5-D, etc.) into a 1-D
+ * offset in the cnode* bitmap */
+extern int ba_node_xlate_to_1d(uint16_t *full_offset,
+ ba_geo_system_t *my_geo_system)
+{
+ int i, map_offset;
+
+ xassert(full_offset);
+ xassert(my_geo_system);
+ i = my_geo_system->dim_count - 1;
+ map_offset = full_offset[i];
+ for (i-- ; i >= 0; i--) {
+ map_offset *= my_geo_system->dim_size[i];
+ map_offset += full_offset[i];
+ }
+ return map_offset;
+}
+
/*
* Used to set all midplanes in a special used state except the ones
* we are able to use in a new allocation.
@@ -1496,27 +1612,51 @@
extern char *ba_switch_usage_str(uint16_t usage)
{
- switch (usage) {
+ bool error_set = (usage & BG_SWITCH_CABLE_ERROR);
+ uint16_t local_usage = usage;
+
+ if (error_set)
+ local_usage &= (~BG_SWITCH_CABLE_ERROR_FULL);
+
+ switch (local_usage) {
case BG_SWITCH_NONE:
+ if (error_set)
+ return "ErrorOut";
return "None";
case BG_SWITCH_WRAPPED_PASS:
+ if (error_set)
+ return "WrappedPass,ErrorOut";
return "WrappedPass";
case BG_SWITCH_TORUS:
+ if (error_set)
+ return "FullTorus,ErrorOut";
return "FullTorus";
case BG_SWITCH_PASS:
+ if (error_set)
+ return "Passthrough,ErrorOut";
return "Passthrough";
case BG_SWITCH_WRAPPED:
+ if (error_set)
+ return "Wrapped,ErrorOut";
return "Wrapped";
case (BG_SWITCH_OUT | BG_SWITCH_OUT_PASS):
+ if (error_set)
+ return "OutLeaving,ErrorOut";
return "OutLeaving";
case BG_SWITCH_OUT:
+ if (error_set)
+ return "ErrorOut";
return "Out";
case (BG_SWITCH_IN | BG_SWITCH_IN_PASS):
+ if (error_set)
+ return "InComming,ErrorOut";
return "InComming";
case BG_SWITCH_IN:
+ if (error_set)
+ return "In,ErrorOut";
return "In";
default:
- error("unknown switch usage %u", usage);
+ error("unknown switch usage %u %u", usage, local_usage);
xassert(0);
break;
}
diff --git a/src/plugins/select/bluegene/ba_common.h b/src/plugins/select/bluegene/ba_common.h
index 8b9344f..b9300d9 100644
--- a/src/plugins/select/bluegene/ba_common.h
+++ b/src/plugins/select/bluegene/ba_common.h
@@ -98,7 +98,7 @@
typedef struct {
uint16_t dim_count; /* Number of system dimensions */
int *dim_size; /* System size in each dimension */
- uint16_t total_size; /* Total number of nodes in system */
+ uint32_t total_size; /* Total number of nodes in system */
ba_geo_table_t **geo_table_ptr; /* Pointers to possible geometries.
* Index is request size */
@@ -148,13 +148,21 @@
ba_switch_t alter_switch[HIGHEST_DIMENSIONS];
/* a switch for each dimensions */
ba_switch_t axis_switch[HIGHEST_DIMENSIONS];
+ /* index into the ba_main_grid_array (BGQ) used for easy look
+ * up of the miplane in that system */
+ uint32_t ba_geo_index;
/* Bitmap of available cnodes */
bitstr_t *cnode_bitmap;
+ /* Bitmap of available cnodes in error (usually software) */
+ bitstr_t *cnode_err_bitmap;
+ /* Bitmap of available cnodes in the containing block */
+ bitstr_t *cnode_usable_bitmap;
/* coordinates of midplane */
uint16_t coord[HIGHEST_DIMENSIONS];
/* coordinates of midplane in str format */
char coord_str[HIGHEST_DIMENSIONS+1];
- /* midplane index used for easy look up of the miplane */
+ /* index into the node_record_table_ptr used for easy look up
+ * of the miplane in that system */
uint32_t index;
/* rack-midplane location. */
char *loc;
@@ -187,6 +195,7 @@
extern bool ba_initialized;
extern uint32_t ba_debug_flags;
extern bitstr_t *ba_main_mp_bitmap;
+extern pthread_mutex_t ba_system_mutex;
/*
* Initialize internal structures by either reading previous block
@@ -208,6 +217,7 @@
/* setup the wires for the system */
extern void ba_setup_wires(void);
+extern void free_internal_ba_mp(ba_mp_t *ba_mp);
extern void destroy_ba_mp(void *ptr);
extern void pack_ba_mp(ba_mp_t *ba_mp, Buf buffer, uint16_t protocol_version);
extern int unpack_ba_mp(ba_mp_t **ba_mp_pptr, Buf buffer,
@@ -254,9 +264,13 @@
* Set dim_count and dim_size. Other fields should be NULL.
* This function will set total_size, geo_table_ptr, and
* geo_table_size.
+ * IN avoid_three - used to get around a limitation in the IBM IO
+ * system where a sub-block allocation can't reliably
+ * have a dimension of 3 in in.
* Release memory using ba_free_geo_table().
*/
-extern void ba_create_geo_table(ba_geo_system_t *my_geo_system);
+extern void ba_create_geo_table(ba_geo_system_t *my_geo_system,
+ bool avoid_three);
/*
* Free memory allocated by ba_create_geo_table().
@@ -284,7 +298,7 @@
* IN full_offset - N-dimension zero-origin offset to set
* IN my_geo_system - system geometry specification
*/
-extern void ba_node_map_set(bitstr_t *node_bitmap, int *full_offset,
+extern void ba_node_map_set(bitstr_t *node_bitmap, uint16_t *full_offset,
ba_geo_system_t *my_geo_system);
/*
@@ -304,7 +318,7 @@
* IN full_offset - N-dimension zero-origin offset to test
* IN my_geo_system - system geometry specification
*/
-extern int ba_node_map_test(bitstr_t *node_bitmap, int *full_offset,
+extern int ba_node_map_test(bitstr_t *node_bitmap, uint16_t *full_offset,
ba_geo_system_t *my_geo_system);
/*
@@ -386,6 +400,16 @@
uint16_t *start_pos, int *scan_offset,
bool deny_wrap);
+/* Translate a multi-dimension coordinate (3-D, 4-D, 5-D, etc.) into a 1-D
+ * offset in the ba_geo_system_t bitmap
+ *
+ * IN full_offset - N-dimension zero-origin offset to test
+ * IN my_geo_system - system geometry specification
+ * RET - 1-D offset
+ */
+extern int ba_node_xlate_to_1d(uint16_t *full_offset,
+ ba_geo_system_t *my_geo_system);
+
/*
* Used to set all midplanes in a special used state except the ones
* we are able to use in a new allocation.
@@ -520,14 +544,17 @@
* IN/OUT results - a list with a NULL destroyer filled in with
* midplanes and wires set to create the block with the api. If
* only interested in the hostlist NULL can be excepted also.
- * IN start - where to start the allocation.
- * IN geometry - the requested geometry of the block.
- * IN conn_type - mesh, torus, or small.
+ * IN ba_request - request for the block
+ *
+ * To be set in the ba_request
+ * start - where to start the allocation. (optional)
+ * geometry or size - the requested geometry of the block. (required)
+ * conn_type - mesh, torus, or small. (required)
+ *
* RET char * - hostlist of midplanes results represent must be
* xfreed. NULL on failure
*/
-extern char *set_bg_block(List results, uint16_t *start,
- uint16_t *geometry, uint16_t *conn_type);
+extern char *set_bg_block(List results, select_ba_request_t* ba_request);
/*
* Set up the map for resolving
@@ -541,18 +568,27 @@
extern void ba_rotate_geo(uint16_t *req_geo, int rot_cnt);
-extern ba_mp_t *ba_pick_sub_block_cnodes(
+extern bool ba_sub_block_in_bitmap(select_jobinfo_t *jobinfo,
+ bitstr_t *usable_bitmap, bool step);
+
+extern int ba_sub_block_in_bitmap_clear(select_jobinfo_t *jobinfo,
+ bitstr_t *usable_bitmap);
+
+extern ba_mp_t *ba_sub_block_in_record(
bg_record_t *bg_record, uint32_t *node_count,
select_jobinfo_t *jobinfo);
-extern int ba_clear_sub_block_cnodes(
+extern int ba_sub_block_in_record_clear(
bg_record_t *bg_record, struct step_record *step_ptr);
+extern void ba_sync_job_to_block(bg_record_t *bg_record,
+ struct job_record *job_ptr);
+
extern bitstr_t *ba_create_ba_mp_cnode_bitmap(bg_record_t *bg_record);
/* set the ionode str based off the block allocator, either ionodes
* or cnode coords */
-extern char *ba_set_ionode_str(bitstr_t *bitmap);
+extern void ba_set_ionode_str(bg_record_t *bg_record);
/* Convert PASS_FOUND_* into equivalent string
* Caller MUST xfree() the returned value */
@@ -560,4 +596,6 @@
extern char *give_geo(uint16_t *int_geo, int dims, bool with_sep);
+extern struct job_record *ba_remove_job_in_block_job_list(
+ bg_record_t *bg_record, struct job_record *job_ptr);
#endif
diff --git a/src/plugins/select/bluegene/bg_core.c b/src/plugins/select/bluegene/bg_core.c
index 2f82707..880bdf2 100644
--- a/src/plugins/select/bluegene/bg_core.c
+++ b/src/plugins/select/bluegene/bg_core.c
@@ -43,9 +43,7 @@
#include "bg_defined_block.h"
#include "src/slurmctld/locks.h"
#include <fcntl.h>
-#ifdef HAVE_BG_L_P
-#include "bl/bridge_status.h"
-#endif
+
#define MAX_FREE_RETRIES 200 /* max number of
* FREE_SLEEP_INTERVALS to wait
* before putting a
@@ -81,15 +79,14 @@
}
bg_record->free_cnt--;
-
if (bg_record->free_cnt == -1) {
info("we got a negative 1 here for %s",
bg_record->bg_block_id);
xassert(0);
return SLURM_SUCCESS;
} else if (bg_record->modifying) {
- info("%d others are modifing this block %s",
- bg_record->free_cnt, bg_record->bg_block_id);
+ info("others are modifing this block %s, don't clear it up",
+ bg_record->bg_block_id);
return SLURM_SUCCESS;
} else if (bg_record->free_cnt) {
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
@@ -98,8 +95,11 @@
return SLURM_SUCCESS;
}
- if (!(bg_record->state & BG_BLOCK_ERROR_FLAG)
- && (bg_record->state != BG_BLOCK_FREE)) {
+ /* Even if the block is already in error state we need to do this to
+ avoid any overlapping blocks that may have been created due
+ to bad hardware.
+ */
+ if ((bg_record->state & (~BG_BLOCK_ERROR_FLAG)) != BG_BLOCK_FREE) {
/* Something isn't right, go mark this one in an error
state. */
update_block_msg_t block_msg;
@@ -115,18 +115,30 @@
slurm_mutex_unlock(&block_state_mutex);
select_g_update_block(&block_msg);
slurm_mutex_lock(&block_state_mutex);
+ if (block_ptr_exist_in_list(bg_lists->main, bg_record))
+ bg_record->destroy = 0;
return SLURM_SUCCESS;
}
+ /* If we are here we are done with the destroy so just reset it. */
+ bg_record->destroy = 0;
+
/* A bit of a sanity check to make sure blocks are being
removed out of all the lists.
*/
remove_from_bg_list(bg_lists->booted, bg_record);
if (remove_from_bg_list(bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
+ == SLURM_SUCCESS) {
+ debug2("_post_block_free: we are freeing block %s and "
+ "it was in the job_running list. This can happen if a "
+ "block is removed while waiting for mmcs to finish "
+ "removing the job from the block.",
+ bg_record->bg_block_id);
num_unused_cpus += bg_record->cpu_cnt;
+ }
- if (restore)
+ /* If we don't have any mp_counts force block removal */
+ if (restore && bg_record->mp_count)
return SLURM_SUCCESS;
if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) {
@@ -181,10 +193,10 @@
while (retry_cnt < MAX_FREE_RETRIES) {
free_cnt = 0;
slurm_mutex_lock(&block_state_mutex);
-#ifdef HAVE_BG_L_P
+
/* just to make sure state is updated */
bridge_status_update_block_list_state(track_list);
-#endif
+
list_iterator_reset(itr);
/* just incase this changes from the update function */
track_cnt = list_count(track_list);
@@ -224,7 +236,7 @@
}
debug("_track_freeing_blocks: Freed them all for job %u", job_id);
- if ((bg_conf->layout_mode == LAYOUT_DYNAMIC) || destroy)
+ if (destroy)
restore = false;
/* If there is a block in error state we need to keep all
@@ -287,8 +299,30 @@
return true;
}
+extern bool block_mp_passthrough(bg_record_t *bg_record, int mp_bit)
+{
+ bool has_pass = 0;
+ ba_mp_t *ba_mp = NULL;
+ ListIterator itr;
+
+ /* no passthrough */
+ if (bg_record->mp_count == list_count(bg_record->ba_mp_list))
+ return 0;
+
+ itr = list_iterator_create(bg_record->ba_mp_list);
+ while ((ba_mp = list_next(itr))) {
+ if (ba_mp->index == mp_bit && ba_mp->used == BA_MP_USED_FALSE) {
+ has_pass = 1;
+ break;
+ }
+ }
+ list_iterator_destroy(itr);
+ return has_pass;
+}
+
/* block_state_mutex must be unlocked before calling this. */
-extern void bg_requeue_job(uint32_t job_id, bool wait_for_start)
+extern void bg_requeue_job(uint32_t job_id, bool wait_for_start,
+ bool slurmctld_locked)
{
int rc;
slurmctld_lock_t job_write_lock = {
@@ -300,13 +334,15 @@
if (wait_for_start)
sleep(2);
- lock_slurmctld(job_write_lock);
+ if (!slurmctld_locked)
+ lock_slurmctld(job_write_lock);
if ((rc = job_requeue(0, job_id, -1, (uint16_t)NO_VAL, false))) {
error("Couldn't requeue job %u, failing it: %s",
job_id, slurm_strerror(rc));
job_fail(job_id);
}
- unlock_slurmctld(job_write_lock);
+ if (!slurmctld_locked)
+ unlock_slurmctld(job_write_lock);
}
/* if SLURM_ERROR you will need to fail the job with
@@ -368,6 +404,15 @@
bg_record->bg_block_id);
bg_record->state = BG_BLOCK_FREE;
break;
+ } else if (rc == BG_ERROR_FREE) {
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_SELECT_TYPE)
+ info("bridge_block_free"
+ "(%s): %s State = %s",
+ bg_record->bg_block_id,
+ bg_err_str(rc),
+ bg_block_state_string(
+ bg_record->state));
} else if (rc == BG_ERROR_INVALID_STATE) {
#ifndef HAVE_BGL
/* If the state is error and
@@ -383,10 +428,11 @@
if (bg_conf->slurm_debug_flags
& DEBUG_FLAG_SELECT_TYPE)
info("bridge_block_free"
- "(%s): %s State = %d",
+ "(%s): %s State = %s",
bg_record->bg_block_id,
bg_err_str(rc),
- bg_record->state);
+ bg_block_state_string(
+ bg_record->state));
#ifdef HAVE_BGQ
if (bg_record->state != BG_BLOCK_FREE
&& bg_record->state
@@ -395,10 +441,11 @@
#endif
} else {
error("bridge_block_free"
- "(%s): %s State = %d",
+ "(%s): %s State = %s",
bg_record->bg_block_id,
bg_err_str(rc),
- bg_record->state);
+ bg_block_state_string(
+ bg_record->state));
}
}
}
@@ -472,6 +519,8 @@
bg_free_block_list_t *bg_free_list;
pthread_attr_t attr_agent;
pthread_t thread_agent;
+ List kill_job_list = NULL;
+ kill_job_struct_t *freeit;
if (!track_list || !list_count(track_list))
return SLURM_SUCCESS;
@@ -491,26 +540,59 @@
}
bg_record->free_cnt++;
+ /* just so we don't over write a different thread that
+ wants this block destroyed */
+ if (destroy && !bg_record->destroy)
+ bg_record->destroy = destroy;
+
+ if (destroy && (bg_record->state & BG_BLOCK_ERROR_FLAG))
+ resume_block(bg_record);
+
if (bg_record->job_ptr
&& !IS_JOB_FINISHED(bg_record->job_ptr)) {
- info("We are freeing a block (%s) that has job %u(%u).",
+ info("We are freeing a block (%s) that "
+ "has job %u(%u).",
bg_record->bg_block_id,
bg_record->job_ptr->job_id,
bg_record->job_running);
- /* This is not thread safe if called from
- bg_job_place.c anywhere from within
- submit_job() or at startup. */
- slurm_mutex_unlock(&block_state_mutex);
- bg_requeue_job(bg_record->job_ptr->job_id, 0);
- slurm_mutex_lock(&block_state_mutex);
+ if (!kill_job_list)
+ kill_job_list =
+ bg_status_create_kill_job_list();
+ freeit = xmalloc(sizeof(kill_job_struct_t));
+ freeit->jobid = bg_record->job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ } else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ ListIterator itr;
+
+ if (!kill_job_list)
+ kill_job_list =
+ bg_status_create_kill_job_list();
+ info("We are freeing a block (%s) that has at "
+ "least 1 job.",
+ bg_record->bg_block_id);
+ itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ if ((job_ptr->magic != JOB_MAGIC)
+ || IS_JOB_FINISHED(job_ptr))
+ continue;
+ freeit = xmalloc(sizeof(kill_job_struct_t));
+ freeit->jobid = job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ }
+ list_iterator_destroy(itr);
}
- if (remove_from_bg_list(bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
- num_unused_cpus += bg_record->cpu_cnt;
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
+ if (kill_job_list) {
+ bg_status_process_kill_job_list(kill_job_list, 0);
+ list_destroy(kill_job_list);
+ kill_job_list = NULL;
+ }
+
if (wait) {
/* Track_freeing_blocks waits until the list is done
and frees the memory of bg_free_list.
@@ -569,6 +651,8 @@
return "Slurm Success";
case SLURM_ERROR:
return "Slurm Error";
+ case BG_ERROR_INVALID_STATE:
+ return "Invalid State";
case BG_ERROR_BLOCK_NOT_FOUND:
return "Block not found";
case BG_ERROR_BOOT_ERROR:
@@ -593,12 +677,14 @@
return "Inconsistent data";
case BG_ERROR_NO_IOBLOCK_CONNECTED:
return "No IO Block Connected";
+ case BG_ERROR_FREE:
+ return "BlockFreeError (Most likely the block has pending action, should clear up shortly, check bridgeapi.log for further info)";
}
/* I know this isn't the best way to handle this, but it only
happens very rarely and usually in debugging, so it
hopefully isn't really all that bad.
*/
- snprintf(tmp_char, sizeof(tmp_char), "%u ?", inx);
+ snprintf(tmp_char, sizeof(tmp_char), "unknown %u?", inx);
return tmp_char;
}
diff --git a/src/plugins/select/bluegene/bg_core.h b/src/plugins/select/bluegene/bg_core.h
index 7277a78..e61a715 100644
--- a/src/plugins/select/bluegene/bg_core.h
+++ b/src/plugins/select/bluegene/bg_core.h
@@ -68,7 +68,9 @@
/* extern int num_unused_cpus; */
extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b);
-extern void bg_requeue_job(uint32_t job_id, bool wait_for_start);
+extern bool block_mp_passthrough(bg_record_t *bg_record, int mp_bit);
+extern void bg_requeue_job(uint32_t job_id, bool wait_for_start,
+ bool slurmctld_locked);
/* sort a list of bg_records by size (node count) */
extern void sort_bg_record_inc_size(List records);
diff --git a/src/plugins/select/bluegene/bg_defined_block.c b/src/plugins/select/bluegene/bg_defined_block.c
index 97807b0..d659ec1 100644
--- a/src/plugins/select/bluegene/bg_defined_block.c
+++ b/src/plugins/select/bluegene/bg_defined_block.c
@@ -55,7 +55,6 @@
ListIterator itr;
bg_record_t *bg_record = NULL;
int i;
- uint16_t geo[SYSTEM_DIMENSIONS];
char temp[256];
struct part_record *part_ptr = NULL;
bitstr_t *usable_mp_bitmap = bit_alloc(node_record_count);
@@ -117,10 +116,10 @@
bg_record->bg_block_id);
for (i=0; i<SYSTEM_DIMENSIONS; i++) {
- geo[i] = bg_record->geo[i];
start_char[i] = alpha_num[
bg_record->start[i]];
- geo_char[i] = alpha_num[geo[i]];
+ geo_char[i] = alpha_num[
+ bg_record->geo[i]];
}
start_char[i] = '\0';
geo_char[i] = '\0';
@@ -149,11 +148,17 @@
#else
List results = list_create(NULL);
#endif
- name = set_bg_block(
- results,
- bg_record->start,
- geo,
- bg_record->conn_type);
+ select_ba_request_t ba_request;
+ memset(&ba_request, 0,
+ sizeof(ba_request));
+ memcpy(ba_request.geometry,
+ bg_record->geo,
+ sizeof(bg_record->geo));
+ memcpy(ba_request.conn_type,
+ bg_record->conn_type,
+ sizeof(bg_record->conn_type));
+ name = set_bg_block(results,
+ &ba_request);
ba_reset_all_removed_mps();
if (!name) {
error("I was unable to "
@@ -183,6 +188,9 @@
#ifdef HAVE_BGQ
bg_record->ba_mp_list = results;
results = NULL;
+ memcpy(bg_record->start,
+ ba_request.start,
+ sizeof(bg_record->start));
#else
bg_record->ba_mp_list =
list_create(destroy_ba_mp);
@@ -276,6 +284,7 @@
bool larger = 0;
char start_char[SYSTEM_DIMENSIONS+1];
char geo_char[SYSTEM_DIMENSIONS+1];
+ select_ba_request_t ba_request;
if (!dims) {
dims = select_g_ba_get_dims();
@@ -400,10 +409,14 @@
#else
results = list_create(NULL);
#endif
- name = set_bg_block(results,
- bg_record->start,
- bg_record->geo,
- bg_record->conn_type);
+ memset(&ba_request, 0, sizeof(ba_request));
+ memcpy(ba_request.start, bg_record->start, sizeof(bg_record->start));
+ memcpy(ba_request.geometry, bg_record->geo, sizeof(bg_record->geo));
+ memcpy(ba_request.conn_type, bg_record->conn_type,
+ sizeof(bg_record->conn_type));
+ ba_request.start_req = 1;
+ name = set_bg_block(results, &ba_request);
+
if (!name) {
error("I was unable to make the full system block.");
list_destroy(results);
diff --git a/src/plugins/select/bluegene/bg_dynamic_block.c b/src/plugins/select/bluegene/bg_dynamic_block.c
index 95095d1..31e9efb 100644
--- a/src/plugins/select/bluegene/bg_dynamic_block.c
+++ b/src/plugins/select/bluegene/bg_dynamic_block.c
@@ -44,7 +44,7 @@
static int _breakup_blocks(List block_list, List new_blocks,
select_ba_request_t *request, List my_block_list,
- bool only_free, bool only_small);
+ int cnodes, bool only_free, bool only_small);
/*
* create_dynamic_block - create new block(s) to be used for a new
@@ -65,13 +65,20 @@
bitstr_t *my_bitmap = NULL;
select_ba_request_t blockreq;
int cnodes = request->procs / bg_conf->cpu_ratio;
+ int orig_cnodes;
uint16_t start_geo[SYSTEM_DIMENSIONS];
- if (cnodes < bg_conf->smallest_block) {
+ if (cnodes < bg_conf->smallest_block)
+ cnodes = bg_conf->smallest_block;
+ orig_cnodes = cnodes;
+
+ if (bg_conf->sub_blocks && (cnodes < bg_conf->mp_cnode_cnt)) {
+ cnodes = bg_conf->mp_cnode_cnt;
+ request->conn_type[0] = SELECT_TORUS;
+ } else if (cnodes < bg_conf->smallest_block) {
error("Can't create this size %d "
- "on this system ionodes_per_mp is %d",
- request->procs,
- bg_conf->ionodes_per_mp);
+ "on this system the smallest block is %u",
+ cnodes, bg_conf->smallest_block);
goto finished;
}
memset(&blockreq, 0, sizeof(select_ba_request_t));
@@ -204,6 +211,7 @@
if (request->avail_mp_bitmap)
ba_set_removable_mps(request->avail_mp_bitmap, 1);
+try_small_again:
if (request->size==1 && cnodes < bg_conf->mp_cnode_cnt) {
switch(cnodes) {
#ifdef HAVE_BGL
@@ -255,21 +263,21 @@
/* check only blocks that are free and small */
if (_breakup_blocks(block_list, new_blocks,
request, my_block_list,
- true, true)
+ cnodes, true, true)
== SLURM_SUCCESS)
goto finished;
/* check only blocks that are free and any size */
if (_breakup_blocks(block_list, new_blocks,
request, my_block_list,
- true, false)
+ cnodes, true, false)
== SLURM_SUCCESS)
goto finished;
/* check usable blocks that are small with any state */
if (_breakup_blocks(block_list, new_blocks,
request, my_block_list,
- false, true)
+ cnodes, false, true)
== SLURM_SUCCESS)
goto finished;
@@ -281,7 +289,7 @@
*/
/* if (_breakup_blocks(block_list, new_blocks, */
/* request, my_block_list, */
- /* false, false) */
+ /* cnodes, false, false) */
/* == SLURM_SUCCESS) */
/* goto finished; */
@@ -293,9 +301,6 @@
info("small block not able to be placed inside others");
}
- if (request->conn_type[0] == SELECT_NAV)
- request->conn_type[0] = SELECT_TORUS;
-
//debug("going to create %d", request->size);
if (!new_ba_request(request)) {
if (request->geometry[0] != (uint16_t)NO_VAL) {
@@ -335,8 +340,7 @@
}
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
- info("allocate failure for size %d base "
- "partitions of free midplanes",
+ info("allocate failure for %d midplanes with free midplanes",
request->size);
rc = SLURM_ERROR;
@@ -350,7 +354,9 @@
bool is_small = 0;
/* never check a block with a job running */
if (bg_record->free_cnt
- || bg_record->job_running != NO_JOB_RUNNING)
+ || ((bg_record->job_running != NO_JOB_RUNNING)
+ || (bg_record->job_list
+ && list_count(bg_record->job_list))))
continue;
/* Here we are only looking for the first
@@ -372,8 +378,10 @@
midplane it will automatically be
-1. So just look for running jobs.
*/
- if ((found_record->job_running
- != NO_JOB_RUNNING)
+ if (((found_record->job_running
+ != NO_JOB_RUNNING)
+ || (found_record->job_list
+ && list_count(found_record->job_list)))
&& bit_overlap(bg_record->mp_bitmap,
found_record->mp_bitmap)) {
found = 1;
@@ -431,6 +439,7 @@
blockreq.linuximage = request->linuximage;
blockreq.mloaderimage = request->mloaderimage;
blockreq.ramdiskimage = request->ramdiskimage;
+ memcpy(blockreq.start, request->start, sizeof(blockreq.start));
memcpy(blockreq.conn_type, request->conn_type,
sizeof(blockreq.conn_type));
@@ -438,6 +447,11 @@
}
finished:
+ if (!new_blocks && orig_cnodes != cnodes) {
+ cnodes = orig_cnodes;
+ goto try_small_again;
+ }
+
if (request->avail_mp_bitmap
&& (bit_ffc(request->avail_mp_bitmap) == -1))
ba_reset_all_removed_mps();
@@ -451,12 +465,15 @@
}
list_iterator_destroy(itr);
-
xfree(request->save_name);
- if (results)
+ if (results) {
list_destroy(results);
+ results = NULL;
+ }
+
errno = rc;
+
return new_blocks;
}
@@ -470,50 +487,11 @@
found_record = (bg_record_t*) xmalloc(sizeof(bg_record_t));
found_record->magic = BLOCK_MAGIC;
+ /* This will be a list containing jobs running on this
+ block */
+ if (bg_conf->sub_blocks)
+ found_record->job_list = list_create(NULL);
found_record->job_running = NO_JOB_RUNNING;
- found_record->user_name = xstrdup(bg_record->user_name);
- found_record->user_uid = bg_record->user_uid;
- found_record->ba_mp_list = list_create(destroy_ba_mp);
- if (bg_record->ba_mp_list)
- ba_mp = list_peek(bg_record->ba_mp_list);
- if (!ba_mp) {
- if (bg_record->mp_str) {
- hostlist_t hl = hostlist_create(bg_record->mp_str);
- char *host = hostlist_shift(hl);
- hostlist_destroy(hl);
- found_record->mp_str = xstrdup(host);
- free(host);
- error("you gave me a list with no ba_mps using %s",
- found_record->mp_str);
- } else {
- char tmp_char[SYSTEM_DIMENSIONS+1];
- int dim;
- for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
- tmp_char[dim] =
- alpha_num[found_record->start[dim]];
- tmp_char[dim] = '\0';
- found_record->mp_str = xstrdup_printf(
- "%s%s",
- bg_conf->slurm_node_prefix,
- tmp_char);
- error("you gave me a record with no ba_mps "
- "and no nodes either using %s",
- found_record->mp_str);
- }
- } else {
- new_ba_mp = ba_copy_mp(ba_mp);
- /* We need to have this node wrapped in Q to handle
- wires correctly when creating around the midplane.
- */
- ba_setup_mp(new_ba_mp, false, true);
-
- new_ba_mp->used = BA_MP_USED_TRUE;
- list_append(found_record->ba_mp_list, new_ba_mp);
- found_record->mp_count = 1;
- found_record->mp_str = xstrdup_printf(
- "%s%s",
- bg_conf->slurm_node_prefix, new_ba_mp->coord_str);
- }
#ifdef HAVE_BGL
found_record->node_use = SELECT_COPROCESSOR_MODE;
@@ -525,8 +503,6 @@
#endif
found_record->mloaderimage = xstrdup(bg_record->mloaderimage);
- process_nodes(found_record, false);
-
if (bg_record->conn_type[0] >= SELECT_SMALL)
found_record->conn_type[0] = bg_record->conn_type[0];
else
@@ -537,13 +513,80 @@
found_record->cnode_cnt = size;
found_record->ionode_bitmap = bit_copy(ionodes);
- found_record->ionode_str =
- ba_set_ionode_str(found_record->ionode_bitmap);
- found_record->mp_used_bitmap = bit_alloc(node_record_count);
+ ba_set_ionode_str(found_record);
+
+ found_record->ba_mp_list = list_create(destroy_ba_mp);
+
+ slurm_mutex_lock(&ba_system_mutex);
+ if (bg_record->ba_mp_list)
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ if (!ba_mp) {
+ if (bg_record->mp_str) {
+ int j = 0, dim;
+ char *nodes = bg_record->mp_str;
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ while (nodes[j] != '\0') {
+ if ((nodes[j] >= '0' && nodes[j] <= '9')
+ || (nodes[j] >= 'A' && nodes[j] <= 'Z')) {
+ break;
+ }
+ j++;
+ }
+ if (nodes[j] && ((strlen(nodes)
+ - (j + SYSTEM_DIMENSIONS)) >= 0)) {
+ for (dim = 0; dim < SYSTEM_DIMENSIONS;
+ dim++, j++)
+ coords[dim] = select_char2coord(
+ nodes[j]);
+ ba_mp = coord2ba_mp(coords);
+ }
+ error("you gave me a list with no ba_mps using %s",
+ ba_mp->coord_str);
+ } else {
+ ba_mp = coord2ba_mp(found_record->start);
+ error("you gave me a record with no ba_mps "
+ "and no nodes either using %s",
+ ba_mp->coord_str);
+ }
+ }
+
+ xassert(ba_mp);
+
+ new_ba_mp = ba_copy_mp(ba_mp);
+ slurm_mutex_unlock(&ba_system_mutex);
+ /* We need to have this node wrapped in Q to handle
+ wires correctly when creating around the midplane.
+ */
+ ba_setup_mp(new_ba_mp, false, true);
+
+ new_ba_mp->used = BA_MP_USED_TRUE;
+
+ /* Create these now so we can deal with error cnodes if/when
+ they happen. Since this is the easiest place to figure it
+ out for blocks that don't use the entire block */
+ if ((new_ba_mp->cnode_bitmap =
+ ba_create_ba_mp_cnode_bitmap(found_record))) {
+ new_ba_mp->cnode_err_bitmap = bit_alloc(bg_conf->mp_cnode_cnt);
+ new_ba_mp->cnode_usable_bitmap =
+ bit_copy(new_ba_mp->cnode_bitmap);
+ }
+
+ list_append(found_record->ba_mp_list, new_ba_mp);
+ found_record->mp_count = 1;
+ found_record->mp_str = xstrdup_printf(
+ "%s%s",
+ bg_conf->slurm_node_prefix, new_ba_mp->coord_str);
+
+ process_nodes(found_record, false);
+
+ /* Force small blocks to always be non-full system blocks.
+ * This really only plays a part on sub-midplane systems. */
+ found_record->full_block = 0;
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
info("made small block of %s[%s]",
found_record->mp_str, found_record->ionode_str);
+
return found_record;
}
@@ -740,7 +783,7 @@
static int _breakup_blocks(List block_list, List new_blocks,
select_ba_request_t *request, List my_block_list,
- bool only_free, bool only_small)
+ int cnodes, bool only_free, bool only_small)
{
int rc = SLURM_ERROR;
bg_record_t *bg_record = NULL;
@@ -748,7 +791,6 @@
int total_cnode_cnt=0;
char start_char[SYSTEM_DIMENSIONS+1];
bitstr_t *ionodes = bit_alloc(bg_conf->ionodes_per_mp);
- int cnodes = request->procs / bg_conf->cpu_ratio;
int curr_mp_bit = -1;
int dim;
@@ -795,7 +837,8 @@
continue;
}
/* never look at a block if a job is running */
- if (bg_record->job_running != NO_JOB_RUNNING)
+ if ((bg_record->job_running != NO_JOB_RUNNING)
+ || (bg_record->job_list && list_count(bg_record->job_list)))
continue;
/* on the third time through look for just a block
* that isn't used */
diff --git a/src/plugins/select/bluegene/bg_enums.h b/src/plugins/select/bluegene/bg_enums.h
index b47ee52..4f4fc29 100644
--- a/src/plugins/select/bluegene/bg_enums.h
+++ b/src/plugins/select/bluegene/bg_enums.h
@@ -111,6 +111,13 @@
BG_JOB_ERROR //!< Job is in error status.
} bg_job_status_t;
+typedef enum {
+ BG_BLOCK_ACTION_NAV = 0,
+ BG_BLOCK_ACTION_NONE,
+ BG_BLOCK_ACTION_BOOT,
+ BG_BLOCK_ACTION_FREE
+} bg_block_action_t;
+
#define BG_BLOCK_ERROR_FLAG 0x1000 // Block is in error
@@ -126,7 +133,22 @@
#define BG_SWITCH_PASS 0x001C /* just passthough used */
#define BG_SWITCH_WRAPPED_PASS 0x001F /* all ports are in use, but no torus */
#define BG_SWITCH_TORUS 0x000F /* all ports are in use in a torus */
-#define BG_SWITCH_START 0x0200 /* modified from the start list */
+
+#define BG_SWITCH_CABLE_ERROR 0x0100 /* Flag to notify cable is in a
+ * error state.
+ */
+#define BG_SWITCH_CABLE_ERROR_SET 0x0200 /* If a cable goes into an error
+ * state we set the cable in
+ * an error and the OUT_PASS
+ * as well.
+ * Currently SLURM only really
+ * cares about the out port of a
+ * switch.
+ */
+#define BG_SWITCH_CABLE_ERROR_FULL 0x0300 /* Used to clear both
+ * BG_SWITCH_CABLE_ERROR
+ * && BG_SWITCH_CABLE_ERROR_SET
+ */
/*
* Total time to boot a bglblock should not exceed
@@ -167,6 +189,7 @@
BG_ERROR_INVALID_INPUT,
BG_ERROR_INCONSISTENT_DATA,
BG_ERROR_NO_IOBLOCK_CONNECTED,
+ BG_ERROR_FREE,
} bg_errno_t;
#endif /* #ifndef ATTACH_BG_H */
diff --git a/src/plugins/select/bluegene/bg_job_info.c b/src/plugins/select/bluegene/bg_job_info.c
index 0a84ef4..3f0eed0 100644
--- a/src/plugins/select/bluegene/bg_job_info.c
+++ b/src/plugins/select/bluegene/bg_job_info.c
@@ -40,6 +40,7 @@
#include "bg_core.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
+#include "src/common/uid.h"
static char *_yes_no_string(uint16_t inx)
{
@@ -68,10 +69,11 @@
jobinfo->reboot = (uint16_t) NO_VAL;
jobinfo->rotate = (uint16_t) NO_VAL;
jobinfo->magic = JOBINFO_MAGIC;
- jobinfo->block_cnode_cnt = NO_VAL;
- jobinfo->cnode_cnt = NO_VAL;
+ jobinfo->block_cnode_cnt = 0;
+ jobinfo->cnode_cnt = 0;
/* This bitstr is created when used. */
+ //jobinfo->units_avail = bit_alloc(bg_conf->mp_cnode_cnt);
//jobinfo->units_used = bit_alloc(bg_conf->mp_cnode_cnt);
/* Remainder of structure is already NULL fulled */
@@ -100,7 +102,9 @@
xfree(jobinfo->linuximage);
xfree(jobinfo->mloaderimage);
xfree(jobinfo->ramdiskimage);
+ FREE_NULL_BITMAP(jobinfo->units_avail);
FREE_NULL_BITMAP(jobinfo->units_used);
+ xfree(jobinfo->user_name);
xfree(jobinfo);
}
return rc;
@@ -117,6 +121,7 @@
int i, rc = SLURM_SUCCESS;
uint16_t *uint16 = (uint16_t *) data;
uint32_t *uint32 = (uint32_t *) data;
+ uid_t *uid = (uid_t *)data;
char *tmp_char = (char *) data;
bg_record_t *bg_record = (bg_record_t *) data;
uint32_t new_size;
@@ -142,12 +147,15 @@
for (i=0; i<jobinfo->dim_cnt; i++) {
jobinfo->geometry[i] = uint16[i];
new_size *= uint16[i];
-
/* Make sure the conn type is correct with the
* new count */
- if ((new_size > 1)
- && (first_conn_type >= SELECT_SMALL))
- jobinfo->conn_type[i] = SELECT_TORUS;
+ if (new_size > 1) {
+ if (first_conn_type != (uint16_t)NO_VAL)
+ jobinfo->conn_type[i] = SELECT_NAV;
+ else if (first_conn_type >= SELECT_SMALL)
+ jobinfo->conn_type[i] =
+ bg_conf->default_conn_type[i];
+ }
}
break;
@@ -204,13 +212,19 @@
case SELECT_JOBDATA_NODE_CNT:
jobinfo->cnode_cnt = *uint32;
/* Make sure the conn type is correct with the new count */
- if ((bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
+ if (bg_conf->sub_mp_sys
|| (jobinfo->cnode_cnt < bg_conf->mp_cnode_cnt)) {
if (jobinfo->conn_type[0] < SELECT_SMALL)
jobinfo->conn_type[0] = SELECT_SMALL;
- } else if (jobinfo->conn_type[0] >= SELECT_SMALL)
- for (i=0; i<SYSTEM_DIMENSIONS; i++)
- jobinfo->conn_type[i] = SELECT_TORUS;
+ } else if (jobinfo->conn_type[0] >= SELECT_SMALL) {
+ for (i=0; i<SYSTEM_DIMENSIONS; i++) {
+ if (jobinfo->conn_type[i] == (uint16_t)NO_VAL)
+ jobinfo->conn_type[i] = SELECT_NAV;
+ else
+ jobinfo->conn_type[i] =
+ bg_conf->default_conn_type[i];
+ }
+ }
break;
case SELECT_JOBDATA_ALTERED:
jobinfo->altered = *uint16;
@@ -239,6 +253,10 @@
for (i=0; i<jobinfo->dim_cnt; i++)
jobinfo->start_loc[i] = uint16[i];
break;
+ case SELECT_JOBDATA_USER_NAME:
+ xfree(jobinfo->user_name);
+ jobinfo->user_name = uid_to_string(*uid);
+ break;
default:
debug("set_select_jobinfo: data_type %d invalid",
data_type);
@@ -357,6 +375,13 @@
uint16[i] = jobinfo->start_loc[i];
}
break;
+ case SELECT_JOBDATA_USER_NAME:
+ if ((jobinfo->user_name == NULL)
+ || (jobinfo->user_name[0] == '\0'))
+ *tmp_char = NULL;
+ else
+ *tmp_char = xstrdup(jobinfo->user_name);
+ break;
default:
debug2("get_jobinfo data_type %d invalid",
data_type);
@@ -400,8 +425,11 @@
rc->linuximage = xstrdup(jobinfo->linuximage);
rc->mloaderimage = xstrdup(jobinfo->mloaderimage);
rc->ramdiskimage = xstrdup(jobinfo->ramdiskimage);
+ if (jobinfo->units_avail)
+ rc->units_avail = bit_copy(jobinfo->units_avail);
if (jobinfo->units_used)
rc->units_used = bit_copy(jobinfo->units_used);
+ rc->user_name = xstrdup(jobinfo->user_name);
}
return rc;
@@ -420,7 +448,68 @@
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
int dims = slurmdb_setup_cluster_dims();
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ if (jobinfo) {
+ if (jobinfo->dim_cnt)
+ dims = jobinfo->dim_cnt;
+ else if (bg_recover != NOT_FROM_CONTROLLER)
+ xassert(0);
+
+ pack16(dims, buffer);
+ /* NOTE: If new elements are added here, make sure to
+ * add equivalant pack of zeros below for NULL
+ * pointer */
+ for (i=0; i<dims; i++) {
+ pack16(jobinfo->geometry[i], buffer);
+ pack16(jobinfo->conn_type[i], buffer);
+ pack16(jobinfo->start_loc[i], buffer);
+ }
+ pack16(jobinfo->reboot, buffer);
+ pack16(jobinfo->rotate, buffer);
+
+ pack32(jobinfo->block_cnode_cnt, buffer);
+ pack32(jobinfo->cnode_cnt, buffer);
+
+ packstr(jobinfo->bg_block_id, buffer);
+ packstr(jobinfo->mp_str, buffer);
+ packstr(jobinfo->ionode_str, buffer);
+
+ packstr(jobinfo->blrtsimage, buffer);
+ packstr(jobinfo->linuximage, buffer);
+ packstr(jobinfo->mloaderimage, buffer);
+ packstr(jobinfo->ramdiskimage, buffer);
+ if (bg_conf) {
+ pack16(bg_conf->mp_cnode_cnt, buffer);
+ pack_bit_fmt(jobinfo->units_avail, buffer);
+ pack_bit_fmt(jobinfo->units_used, buffer);
+ } else {
+ pack16(0, buffer);
+ packnull(buffer);
+ packnull(buffer);
+ }
+ } else {
+ pack16(dims, buffer);
+ /* pack space for 3 positions for geo
+ * conn_type and start_loc and then, reboot, and rotate
+ */
+ for (i=0; i<((dims*3)+2); i++) {
+ pack16((uint16_t) 0, buffer);
+ }
+ pack32((uint32_t) 0, buffer); //block_cnode_cnt
+ pack32((uint32_t) 0, buffer); //cnode_cnt
+ packnull(buffer); //bg_block_id
+ packnull(buffer); //nodes
+ packnull(buffer); //ionodes
+
+ packnull(buffer); //blrts
+ packnull(buffer); //linux
+ packnull(buffer); //mloader
+ packnull(buffer); //ramdisk
+ pack16((uint16_t) 0, buffer); //mp_cnode_cnt
+ packnull(buffer); //units_avail
+ packnull(buffer); //units_used
+ }
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
if (jobinfo) {
if (jobinfo->dim_cnt)
dims = jobinfo->dim_cnt;
@@ -573,6 +662,7 @@
{
int i;
uint32_t uint32_tmp;
+ uint16_t mp_cnode_cnt;
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
int dims = slurmdb_setup_cluster_dims();
select_jobinfo_t *jobinfo = xmalloc(sizeof(struct select_jobinfo));
@@ -581,7 +671,52 @@
jobinfo->magic = JOBINFO_MAGIC;
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack16(&jobinfo->dim_cnt, buffer);
+
+ xassert(jobinfo->dim_cnt);
+ dims = jobinfo->dim_cnt;
+
+ for (i=0; i<dims; i++) {
+ safe_unpack16(&(jobinfo->geometry[i]), buffer);
+ safe_unpack16(&(jobinfo->conn_type[i]), buffer);
+ safe_unpack16(&(jobinfo->start_loc[i]), buffer);
+ }
+
+ safe_unpack16(&(jobinfo->reboot), buffer);
+ safe_unpack16(&(jobinfo->rotate), buffer);
+
+ safe_unpack32(&(jobinfo->block_cnode_cnt), buffer);
+ safe_unpack32(&(jobinfo->cnode_cnt), buffer);
+
+ safe_unpackstr_xmalloc(&(jobinfo->bg_block_id), &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&(jobinfo->mp_str), &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&(jobinfo->ionode_str), &uint32_tmp,
+ buffer);
+
+ safe_unpackstr_xmalloc(&(jobinfo->blrtsimage),
+ &uint32_tmp, buffer);
+ safe_unpackstr_xmalloc(&(jobinfo->linuximage), &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&(jobinfo->mloaderimage), &uint32_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&(jobinfo->ramdiskimage), &uint32_tmp,
+ buffer);
+ safe_unpack16(&mp_cnode_cnt, buffer);
+ safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
+ if (bit_char) {
+ jobinfo->units_avail = bit_alloc(mp_cnode_cnt);
+ bit_unfmt(jobinfo->units_avail, bit_char);
+ xfree(bit_char);
+ }
+ safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
+ if (bit_char) {
+ jobinfo->units_used = bit_alloc(mp_cnode_cnt);
+ bit_unfmt(jobinfo->units_used, bit_char);
+ xfree(bit_char);
+ }
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
safe_unpack16(&jobinfo->dim_cnt, buffer);
xassert(jobinfo->dim_cnt);
@@ -696,6 +831,7 @@
char *tmp_image = "default";
char *header = "CONNECT REBOOT ROTATE GEOMETRY BLOCK_ID";
bool print_x = 1;
+ char *conn_type = NULL;
if (buf == NULL) {
error("sprint_jobinfo: buf is null");
@@ -728,8 +864,10 @@
else
xstrcat(geo, "0");
}
- } else if (mode != SELECT_PRINT_START_LOC)
+ } else if (mode != SELECT_PRINT_START_LOC) {
geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, print_x);
+ conn_type = conn_type_string_full(jobinfo->conn_type);
+ }
switch (mode) {
case SELECT_PRINT_HEAD:
@@ -738,7 +876,7 @@
case SELECT_PRINT_DATA:
snprintf(buf, size,
"%7.7s %6.6s %6.6s %s %-16s",
- conn_type_string(jobinfo->conn_type[0]),
+ conn_type,
_yes_no_string(jobinfo->reboot),
_yes_no_string(jobinfo->rotate),
geo,
@@ -748,7 +886,7 @@
snprintf(buf, size,
"Connection=%s Reboot=%s Rotate=%s "
"Geometry=%s",
- conn_type_string(jobinfo->conn_type[0]),
+ conn_type,
_yes_no_string(jobinfo->reboot),
_yes_no_string(jobinfo->rotate),
geo);
@@ -757,7 +895,7 @@
snprintf(buf, size,
"Connection=%s Reboot=%s Rotate=%s "
"Geometry=%s Block_ID=%s",
- conn_type_string(jobinfo->conn_type[0]),
+ conn_type,
_yes_no_string(jobinfo->reboot),
_yes_no_string(jobinfo->rotate),
geo,
@@ -774,8 +912,7 @@
snprintf(buf, size, "%s", jobinfo->mp_str);
break;
case SELECT_PRINT_CONNECTION:
- snprintf(buf, size, "%s",
- conn_type_string(jobinfo->conn_type[0]));
+ snprintf(buf, size, "%s", conn_type);
break;
case SELECT_PRINT_REBOOT:
snprintf(buf, size, "%s",
@@ -819,6 +956,7 @@
buf[0] = '\0';
}
xfree(geo);
+ xfree(conn_type);
return buf;
}
@@ -835,6 +973,7 @@
char *buf = NULL;
char *header = "CONNECT REBOOT ROTATE GEOMETRY BLOCK_ID";
bool print_x = 1;
+ char *conn_type = NULL;
if ((mode != SELECT_PRINT_DATA)
&& jobinfo && (jobinfo->magic != JOBINFO_MAGIC)) {
@@ -861,9 +1000,10 @@
else
xstrcat(geo, "0");
}
- } else if (mode != SELECT_PRINT_START_LOC)
+ } else if (mode != SELECT_PRINT_START_LOC) {
geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, print_x);
-
+ conn_type = conn_type_string_full(jobinfo->conn_type);
+ }
switch (mode) {
case SELECT_PRINT_HEAD:
xstrcat(buf, header);
@@ -871,7 +1011,7 @@
case SELECT_PRINT_DATA:
xstrfmtcat(buf,
"%7.7s %6.6s %6.6s %s %-16s",
- conn_type_string(jobinfo->conn_type[0]),
+ conn_type,
_yes_no_string(jobinfo->reboot),
_yes_no_string(jobinfo->rotate),
geo,
@@ -881,7 +1021,7 @@
xstrfmtcat(buf,
"Connection=%s Reboot=%s Rotate=%s "
"Geometry=%s Block_ID=%s",
- conn_type_string(jobinfo->conn_type[0]),
+ conn_type,
_yes_no_string(jobinfo->reboot),
_yes_no_string(jobinfo->rotate),
geo,
@@ -898,8 +1038,7 @@
xstrfmtcat(buf, "%s", jobinfo->mp_str);
break;
case SELECT_PRINT_CONNECTION:
- xstrfmtcat(buf, "%s",
- conn_type_string(jobinfo->conn_type[0]));
+ xstrfmtcat(buf, "%s", conn_type);
break;
case SELECT_PRINT_REBOOT:
xstrfmtcat(buf, "%s",
@@ -941,5 +1080,6 @@
error("xstrdup_jobinfo: bad mode %d", mode);
}
xfree(geo);
+ xfree(conn_type);
return buf;
}
diff --git a/src/plugins/select/bluegene/bg_job_info.h b/src/plugins/select/bluegene/bg_job_info.h
index 4183287..b22aadf 100644
--- a/src/plugins/select/bluegene/bg_job_info.h
+++ b/src/plugins/select/bluegene/bg_job_info.h
@@ -76,8 +76,12 @@
uint16_t rotate; /* permit geometry rotation if set */
uint16_t start_loc[HIGHEST_DIMENSIONS]; /* where in block we
* are starting from */
- bitstr_t *units_used; /* Used for a step the cnodes used.
- */
+ bitstr_t *units_avail; /* Used for a job allocation in a sub block,
+ the cnodes available to be used are
+ set. */
+ bitstr_t *units_used; /* bit not set are available for running. */
+ char *user_name; /* user name of the user_id only used
+ * when running multiple jobs per block */
};
/* allocate storage for a select job credential
diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c
index 5f609d3..cb463b3 100644
--- a/src/plugins/select/bluegene/bg_job_place.c
+++ b/src/plugins/select/bluegene/bg_job_place.c
@@ -6,6 +6,7 @@
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Dan Phung <phung4@llnl.gov> and Morris Jette <jette1@llnl.gov>
+ * and Danny Auble <da@schedmd.com>
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
@@ -288,7 +289,12 @@
bg_record_t *bg_record = NULL;
ListIterator itr = NULL;
char tmp_char[256];
-
+ int dim = 0;
+#ifdef HAVE_BG_L_P
+ int conn_type_dims = 1;
+#else
+ int conn_type_dims = SYSTEM_DIMENSIONS;
+#endif
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
info("number of blocks to check: %d state %d "
"asking for %u-%u cpus",
@@ -311,7 +317,7 @@
|| ((!SELECT_IS_CHECK_FULL_SET(query_mode)
|| SELECT_IS_MODE_RUN_NOW(query_mode))
&& (bg_conf->layout_mode != LAYOUT_DYNAMIC))) {
- if (bg_record->free_cnt) {
+ if (bg_record->destroy) {
/* No reason to look at a block that
is being freed unless we are
running static and looking at the
@@ -319,8 +325,24 @@
*/
if (bg_conf->slurm_debug_flags
& DEBUG_FLAG_BG_PICK)
- info("block %s being free for other "
- "job(s), skipping",
+ info("block %s being destroyed, "
+ "skipping",
+ bg_record->bg_block_id);
+ continue;
+ } else if ((bg_record->action == BG_BLOCK_ACTION_FREE)
+ && (bg_record->state == BG_BLOCK_INITED)) {
+ /* If we are in the action state of
+ FREE of 'D' continue on and don't
+ look at this block just yet. Only
+ do this if the block is still
+ booted since the action happens on
+ a regular free as well.
+ */
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("block %s can't be used, "
+ "it has an action item of 'D' "
+ "on it.",
bg_record->bg_block_id);
continue;
} else if ((bg_record->job_running == BLOCK_ERROR_STATE)
@@ -335,9 +357,8 @@
"state (can't use)",
bg_record->bg_block_id);
continue;
- } else if ((bg_record->job_running != NO_JOB_RUNNING)
- && (bg_record->job_running
- != job_ptr->job_id)) {
+ } else if (bg_record->job_ptr
+ && (bg_record->job_ptr != job_ptr)) {
/* Look here if you are trying to run now or
if you aren't looking at the full set. We
don't continue on running blocks for the
@@ -346,11 +367,85 @@
*/
if (bg_conf->slurm_debug_flags
& DEBUG_FLAG_BG_PICK)
- info("block %s in use by %s job %d",
+ info("block %s in use by %d job %d",
bg_record->bg_block_id,
- bg_record->user_name,
- bg_record->job_running);
+ bg_record->job_ptr->user_id,
+ bg_record->job_ptr->job_id);
continue;
+ } else if (bg_record->err_ratio) {
+ bg_record_t *found_record = NULL;
+ slurm_mutex_lock(&block_state_mutex);
+
+ if (bg_record->original)
+ found_record =
+ bg_record->original;
+ else
+ found_record =
+ find_org_in_bg_list(
+ bg_lists->main,
+ bg_record);
+ if (!found_record)
+ found_record = bg_record;
+
+ /* We have to use the original record
+ here to avoid missing jobs that
+ perhaps were removed to see if a
+ job would run or if we were doing
+ preemption.
+ */
+ if (!found_record->job_ptr
+ && (!found_record->job_list ||
+ !list_count(found_record->job_list))) {
+
+ if (found_record->free_cnt)
+ slurm_mutex_unlock(
+ &block_state_mutex);
+ else {
+ List tmp_list =
+ list_create(NULL);
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("going to free "
+ "block %s "
+ "there are no "
+ "jobs running. "
+ "This will only "
+ "happen if the "
+ "cnodes went into "
+ "error after no "
+ "jobs were "
+ "running.",
+ bg_record->
+ bg_block_id);
+
+ list_push(tmp_list,
+ found_record);
+ slurm_mutex_unlock(
+ &block_state_mutex);
+ free_block_list(NO_VAL,
+ tmp_list, 0, 0);
+ list_destroy(tmp_list);
+ }
+ } else if (found_record->err_ratio
+ >= bg_conf->max_block_err) {
+ slurm_mutex_unlock(&block_state_mutex);
+ /* This means the block is higher than
+ the given max_block_err defined in
+ the bluegene.conf.
+ */
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("block %s can't be used "
+ "anymore, %u%% of the "
+ "block is in error "
+ "state >= %u%%",
+ bg_record->bg_block_id,
+ bg_record->err_ratio,
+ bg_conf->max_block_err);
+ continue;
+ } else
+ slurm_mutex_unlock(&block_state_mutex);
+
}
}
@@ -358,18 +453,31 @@
if ((bg_record->cpu_cnt < request->procs)
|| ((max_cpus != NO_VAL)
&& (bg_record->cpu_cnt > max_cpus))) {
- /* We use the proccessor count per block here
- mostly to see if we can run on a smaller block.
+ /* If we are looking for a sub-block just pass
+ this by since we will usually be given a
+ larger block than our allocation request.
*/
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
- convert_num_unit((float)bg_record->cpu_cnt,
- tmp_char,
- sizeof(tmp_char), UNIT_NONE);
- info("block %s CPU count (%s) not suitable",
- bg_record->bg_block_id,
- tmp_char);
+ if ((bg_record->cpu_cnt < request->procs)
+ || !bg_conf->sub_blocks
+ || (bg_record->mp_count > 1)) {
+ /* We use the proccessor count per block here
+ mostly to see if we can run on a
+ smaller block.
+ */
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK) {
+ convert_num_unit(
+ (float)bg_record->cpu_cnt,
+ tmp_char,
+ sizeof(tmp_char), UNIT_NONE);
+ info("block %s CPU count (%u) "
+ "not suitable, asking for %u-%u",
+ bg_record->bg_block_id,
+ bg_record->cpu_cnt, request->procs,
+ max_cpus);
+ }
+ continue;
}
- continue;
}
/*
@@ -386,7 +494,8 @@
char *temp2 = bitmap2node_name(
slurm_block_bitmap);
info("bg block %s has nodes not "
- "usable by this job %s %s",
+ "usable by this job %s available "
+ "midplanes were %s",
bg_record->bg_block_id, temp, temp2);
xfree(temp);
xfree(temp2);
@@ -406,6 +515,24 @@
continue;
}
+#ifndef HAVE_BG_L_P
+ if (!SELECT_IS_TEST(query_mode)
+ && (bg_conf->layout_mode != LAYOUT_DYNAMIC)) {
+ /* make sure we don't have any bad cables.
+ * We need to reset the system with true here
+ * to reveal any bad cables. */
+ reset_ba_system(true);
+ if (check_and_set_mp_list(bg_record->ba_mp_list)
+ == SLURM_ERROR) {
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("bg block %s has unavailable "
+ "overlapping hardware.",
+ bg_record->bg_block_id);
+ continue;
+ }
+ }
+#endif
if (_check_for_booted_overlapping_blocks(
block_list, itr, bg_record,
overlap_check, overlapped_list, query_mode))
@@ -446,42 +573,58 @@
/***********************************************/
/* check the connection type specified matches */
/***********************************************/
- if ((request->conn_type[0] != bg_record->conn_type[0])
- && (request->conn_type[0] != SELECT_NAV)) {
-#ifdef HAVE_BGP
- if (request->conn_type[0] >= SELECT_SMALL) {
- /* we only want to reboot blocks if
- they have to be so skip booted
- blocks if in small state
- */
- if (check_image
- && (bg_record->state
- == BG_BLOCK_INITED)) {
- *allow = 1;
- continue;
+ for (dim=0; dim<conn_type_dims; dim++) {
+ if ((request->conn_type[dim]
+ != bg_record->conn_type[dim])
+ && (request->conn_type[dim] != SELECT_NAV)) {
+ if (request->conn_type[0] >= SELECT_SMALL) {
+ /* we only want to reboot blocks if
+ they have to be so skip booted
+ blocks if in small state
+ */
+ if (check_image
+ && (bg_record->state
+ == BG_BLOCK_INITED)) {
+ *allow = 1;
+ break;
+ }
+ goto good_conn_type;
+ } else if (bg_record->conn_type[0]
+ >= SELECT_SMALL) {
+ /* since we already checked to see if
+ the cpus were good this means we are
+ looking for a block in a range that
+ includes small and regular blocks.
+ So we can just continue on.
+ */
+ goto good_conn_type;
}
- goto good_conn_type;
- } else if (bg_record->conn_type[0] >= SELECT_SMALL) {
- /* since we already checked to see if
- the cpus were good this means we are
- looking for a block in a range that
- includes small and regular blocks.
- So we can just continue on.
- */
- goto good_conn_type;
+
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK) {
+ char *req_conn_type =
+ conn_type_string_full(
+ request->conn_type);
+ char *conn_type =
+ conn_type_string_full(
+ bg_record->conn_type);
+ info("bg block %s conn-type not usable "
+ "asking for %s bg_record is %s",
+ bg_record->bg_block_id,
+ req_conn_type,
+ conn_type);
+ xfree(req_conn_type);
+ xfree(conn_type);
+ }
+ break;
}
-#endif
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
- info("bg block %s conn-type not usable "
- "asking for %s bg_record is %s",
- bg_record->bg_block_id,
- conn_type_string(request->conn_type[0]),
- conn_type_string(bg_record->conn_type[0]));
- continue;
}
-#ifdef HAVE_BGP
+
+ if (dim != conn_type_dims)
+ continue;
+
good_conn_type:
-#endif
+
/*****************************************/
/* match up geometry as "best" possible */
/*****************************************/
@@ -490,6 +633,76 @@
request->rotate)))
continue;
+ if (bg_conf->sub_blocks && bg_record->mp_count == 1) {
+ select_jobinfo_t tmp_jobinfo, *jobinfo =
+ job_ptr->select_jobinfo->data;
+ bitstr_t *total_bitmap;
+ bool need_free = false;
+ ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list);
+
+ xassert(ba_mp);
+ xassert(ba_mp->cnode_bitmap);
+ xassert(ba_mp->cnode_usable_bitmap);
+
+ if (bg_record->err_ratio) {
+ xassert(ba_mp->cnode_err_bitmap);
+ total_bitmap = bit_copy(ba_mp->cnode_bitmap);
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
+ need_free = true;
+ } else
+ total_bitmap = ba_mp->cnode_bitmap;
+
+ memset(&tmp_jobinfo, 0, sizeof(select_jobinfo_t));
+ tmp_jobinfo.cnode_cnt = jobinfo->cnode_cnt;
+ if (!ba_sub_block_in_bitmap(
+ &tmp_jobinfo, total_bitmap, 0)) {
+ if (need_free)
+ FREE_NULL_BITMAP(total_bitmap);
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK) {
+ info("block %s does not have a "
+ "placement for a sub-block of "
+ "this size (%u) ",
+ bg_record->bg_block_id,
+ request->procs);
+ }
+ continue;
+ }
+
+ if (need_free)
+ FREE_NULL_BITMAP(total_bitmap);
+ /* Clear up what we just found if not running now. */
+ if (SELECT_IS_MODE_RUN_NOW(query_mode)) {
+ jobinfo->cnode_cnt = tmp_jobinfo.cnode_cnt;
+ jobinfo->dim_cnt = tmp_jobinfo.dim_cnt;
+
+ if (jobinfo->units_avail)
+ FREE_NULL_BITMAP(jobinfo->units_avail);
+ jobinfo->units_avail = tmp_jobinfo.units_avail;
+ tmp_jobinfo.units_avail = NULL;
+
+ if (jobinfo->units_used)
+ FREE_NULL_BITMAP(jobinfo->units_used);
+ jobinfo->units_used = tmp_jobinfo.units_used;
+ tmp_jobinfo.units_used = NULL;
+
+ xfree(jobinfo->ionode_str);
+ jobinfo->ionode_str = tmp_jobinfo.ionode_str;
+ tmp_jobinfo.ionode_str = NULL;
+
+ memcpy(jobinfo->geometry, tmp_jobinfo.geometry,
+ sizeof(jobinfo->geometry));
+ memcpy(jobinfo->start_loc,
+ tmp_jobinfo.start_loc,
+ sizeof(jobinfo->start_loc));
+
+ }
+
+ FREE_NULL_BITMAP(tmp_jobinfo.units_avail);
+ FREE_NULL_BITMAP(tmp_jobinfo.units_used);
+ xfree(tmp_jobinfo.ionode_str);
+ }
+
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
info("we found one! %s", bg_record->bg_block_id);
break;
@@ -499,6 +712,49 @@
return bg_record;
}
+/* job_write_lock and block_state_mutex should be locked before this */
+static List _handle_jobs_unusable_block(bg_record_t *bg_record)
+{
+ kill_job_struct_t *freeit = NULL;
+ List kill_job_list = NULL;
+ /* We need to make sure if a job is running here to not
+ call the regular method since we are inside the job write
+ lock already.
+ */
+ if (bg_record->job_ptr && !IS_JOB_FINISHED(bg_record->job_ptr)) {
+ info("Somehow block %s is being freed, but appears "
+ "to already have a job %u(%u) running on it.",
+ bg_record->bg_block_id,
+ bg_record->job_ptr->job_id,
+ bg_record->job_running);
+ kill_job_list = bg_status_create_kill_job_list();
+ freeit = (kill_job_struct_t *)xmalloc(sizeof(freeit));
+ freeit->jobid = bg_record->job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ } else if (bg_record->job_list && list_count(bg_record->job_list)) {
+ ListIterator itr = list_iterator_create(bg_record->job_list);
+ struct job_record *job_ptr = NULL;
+ while ((job_ptr = list_next(itr))) {
+ if (IS_JOB_FINISHED(job_ptr))
+ continue;
+ info("Somehow block %s is being freed, but appears "
+ "to already have a job %u(%u) running on it.",
+ bg_record->bg_block_id,
+ job_ptr->job_id,
+ bg_record->job_running);
+ if (!kill_job_list)
+ kill_job_list =
+ bg_status_create_kill_job_list();
+ freeit = (kill_job_struct_t *)xmalloc(sizeof(freeit));
+ freeit->jobid = bg_record->job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ return kill_job_list;
+}
+
static int _check_for_booted_overlapping_blocks(
List block_list, ListIterator bg_record_itr,
bg_record_t *bg_record, int overlap_check, List overlapped_list,
@@ -543,7 +799,9 @@
*/
if (is_test && overlapped_list
&& found_record->job_ptr
- && bg_record->job_running == NO_JOB_RUNNING) {
+ && ((bg_record->job_running == NO_JOB_RUNNING)
+ && (!bg_record->job_list
+ || !list_count(bg_record->job_list)))) {
ListIterator itr = list_iterator_create(
overlapped_list);
bg_record_t *tmp_rec = NULL;
@@ -600,6 +858,8 @@
|| SELECT_IS_MODE_RUN_NOW(query_mode))
&& (bg_conf->layout_mode != LAYOUT_DYNAMIC)))
&& ((found_record->job_running != NO_JOB_RUNNING)
+ || (found_record->job_list
+ && list_count(found_record->job_list))
|| (found_record->state
& BG_BLOCK_ERROR_FLAG))) {
if ((found_record->job_running
@@ -623,6 +883,7 @@
if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
List tmp_list = list_create(NULL);
+ List kill_job_list = NULL;
/* this will remove and
* destroy the memory for
* bg_record
@@ -673,47 +934,19 @@
destroy_bg_record(bg_record);
list_push(tmp_list, found_record);
+
+ kill_job_list =
+ _handle_jobs_unusable_block(
+ found_record);
+
slurm_mutex_unlock(&block_state_mutex);
- /* We need to make sure if a
- job is running here to not
- call the regular method since
- we are inside the job write
- lock already.
- */
- if (found_record->job_ptr
- && !IS_JOB_FINISHED(
- found_record->job_ptr)) {
- info("Somehow block %s "
- "is being freed, but "
- "appears to already have "
- "a job %u(%u) running "
- "on it.",
- found_record->bg_block_id,
- found_record->
- job_ptr->job_id,
- found_record->job_running);
- if (job_requeue(0,
- found_record->
- job_ptr->job_id,
- -1,
- (uint16_t)
- NO_VAL,
- false)) {
- error("Couldn't "
- "requeue job %u, "
- "failing it: %s",
- found_record->
- job_ptr->job_id,
- slurm_strerror(
- rc));
- job_fail(found_record->
- job_ptr->
- job_id);
- }
+ if (kill_job_list) {
+ bg_status_process_kill_job_list(
+ kill_job_list, 1);
+ list_destroy(kill_job_list);
}
-
- free_block_list(NO_VAL, tmp_list, 0, 0);
+ free_block_list(NO_VAL, tmp_list, 1, 0);
list_destroy(tmp_list);
}
rc = 1;
@@ -855,6 +1088,28 @@
return rc;
}
+
+/* Return the last finishing job on a shared block */
+static struct job_record *_get_last_job(bg_record_t *bg_record)
+{
+ struct job_record *found_job_ptr;
+ struct job_record *last_job_ptr;
+
+ ListIterator job_list_itr = NULL;
+
+ xassert(bg_record->job_list);
+
+ job_list_itr = list_iterator_create(bg_record->job_list);
+ last_job_ptr = list_next(job_list_itr);
+ while ((found_job_ptr = list_next(job_list_itr))) {
+ if (found_job_ptr->end_time > last_job_ptr->end_time)
+ last_job_ptr = found_job_ptr;
+ }
+ list_iterator_destroy(job_list_itr);
+
+ return last_job_ptr;
+}
+
/*
* finds the best match for a given job request
*
@@ -919,8 +1174,13 @@
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_CONN_TYPE, &request.conn_type);
- get_select_jobinfo(job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_GEOMETRY, &req_geometry);
+
+ if (req_procs <= bg_conf->cpus_per_mp)
+ req_geometry[0] = (uint16_t)NO_VAL;
+ else
+ get_select_jobinfo(job_ptr->select_jobinfo->data,
+ SELECT_JOBDATA_GEOMETRY, &req_geometry);
+
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_ROTATE, &request.rotate);
@@ -1020,7 +1280,6 @@
/* set the bitmap and do other allocation activities */
if (bg_record) {
-#ifdef HAVE_BG_L_P
if (!is_test) {
if (bridge_block_check_mp_states(
bg_record->bg_block_id, 1)
@@ -1045,7 +1304,7 @@
continue;
}
}
-#endif
+
format_node_name(bg_record, tmp_char, sizeof(tmp_char));
debug("_find_best_block_match %s <%s>",
@@ -1102,7 +1361,9 @@
*/
itr = list_iterator_create(block_list);
while ((bg_record = list_next(itr))) {
- if (bg_record->job_running != NO_JOB_RUNNING)
+ if ((bg_record->job_running != NO_JOB_RUNNING)
+ || (bg_record->job_list
+ && list_count(bg_record->job_list)))
list_append(job_list, bg_record);
/* Since the error blocks are at the
end we only really need to look at
@@ -1159,6 +1420,26 @@
*/
bg_record->job_running =
NO_JOB_RUNNING;
+ } else if (bg_record->job_list
+ && list_count(bg_record->
+ job_list)) {
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("taking off "
+ "%d jobs that "
+ "are running on "
+ "%s",
+ list_count(
+ bg_record->
+ job_list),
+ bg_record->
+ bg_block_id);
+ /* bg_record->job_running
+ isn't used when we use
+ job lists, so no need
+ to set it to
+ NO_JOB_RUNNING.
+ */
} else if ((bg_record->job_running
== BLOCK_ERROR_STATE)
&& (bg_conf->
@@ -1168,7 +1449,7 @@
"which is in an "
"error state",
bg_record->bg_block_id);
- } else
+ } else {
/* This means we didn't have
any jobs to take off
anymore so we are making
@@ -1176,7 +1457,8 @@
node on the system.
*/
track_down_nodes = false;
-
+ request.full_check = true;
+ }
if (!(new_blocks = create_dynamic_block(
block_list, &request, job_list,
track_down_nodes))) {
@@ -1231,10 +1513,21 @@
(*found_bg_record)->mp_bitmap);
if (bg_record) {
- (*found_bg_record)->job_running =
- bg_record->job_running;
- (*found_bg_record)->job_ptr
- = bg_record->job_ptr;
+ if (bg_record->job_list &&
+ list_count(bg_record->job_list)) {
+ (*found_bg_record)->job_ptr =
+ _get_last_job(
+ bg_record);
+ (*found_bg_record)->job_running
+ = (*found_bg_record)->
+ job_ptr->job_id;
+ } else {
+ (*found_bg_record)->job_running
+ = bg_record->
+ job_running;
+ (*found_bg_record)->job_ptr
+ = bg_record->job_ptr;
+ }
}
list_destroy(new_blocks);
break;
@@ -1276,7 +1569,8 @@
then we don't need to add either, (since it is
already in the list).
*/
- if (!new_record->bg_block_id || new_record->original)
+ if ((new_record->magic != BLOCK_MAGIC)
+ || !new_record->bg_block_id || new_record->original)
continue;
list_remove(itr);
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
@@ -1293,17 +1587,20 @@
return count;
}
-static void _build_select_struct(struct job_record *job_ptr,
- bitstr_t *bitmap, uint32_t node_cnt)
+static void _build_job_resources_struct(
+ struct job_record *job_ptr, bitstr_t *bitmap, bg_record_t *bg_record)
{
int i;
- uint32_t total_cpus = 0;
job_resources_t *job_resrcs_ptr;
+ select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data;
+ uint32_t node_cnt = jobinfo->cnode_cnt;
xassert(job_ptr);
if (job_ptr->job_resrcs) {
- error("select_p_job_test: already have select_job");
+ error("_build_job_resources_struct: already have job_resouces "
+ "for job %u",
+ job_ptr->job_id);
free_job_resources(&job_ptr->job_resrcs);
}
@@ -1314,57 +1611,81 @@
job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt);
/* job_resrcs_ptr->nhosts = node_cnt; */
job_resrcs_ptr->nhosts = bit_set_count(bitmap);
- job_resrcs_ptr->ncpus = job_ptr->details->min_cpus;
- job_resrcs_ptr->node_bitmap = bit_copy(bitmap);
- job_resrcs_ptr->nodes = bitmap2node_name(bitmap);
- if (job_resrcs_ptr->node_bitmap == NULL)
+
+ if (!(job_resrcs_ptr->node_bitmap = bit_copy(bitmap)))
fatal("bit_copy malloc failure");
+ job_resrcs_ptr->nodes = xstrdup(bg_record->mp_str);
+
job_resrcs_ptr->cpu_array_cnt = 1;
job_resrcs_ptr->cpu_array_value[0] = bg_conf->cpu_ratio;
job_resrcs_ptr->cpu_array_reps[0] = node_cnt;
- total_cpus = bg_conf->cpu_ratio * node_cnt;
+ job_resrcs_ptr->ncpus = job_ptr->total_cpus =
+ job_ptr->cpu_cnt = job_ptr->details->min_cpus =
+ bg_conf->cpu_ratio * node_cnt;
for (i=0; i<node_cnt; i++)
job_resrcs_ptr->cpus[i] = bg_conf->cpu_ratio;
-
- if (job_resrcs_ptr->ncpus != total_cpus) {
- error("select_p_job_test: ncpus mismatch %u != %u",
- job_resrcs_ptr->ncpus, total_cpus);
- }
}
static List _get_preemptables(uint16_t query_mode, bg_record_t *bg_record,
- List preempt_jobs)
+ struct job_record *in_job_ptr, List preempt_jobs)
{
List preempt = NULL;
- ListIterator itr;
ListIterator job_itr;
bg_record_t *found_record;
struct job_record *job_ptr;
+ select_jobinfo_t *in_jobinfo = in_job_ptr->select_jobinfo->data;
xassert(bg_record);
+ xassert(in_job_ptr);
xassert(preempt_jobs);
- preempt = list_create(NULL);
slurm_mutex_lock(&block_state_mutex);
job_itr = list_iterator_create(preempt_jobs);
- itr = list_iterator_create(bg_lists->main);
- while ((found_record = list_next(itr))) {
- if (!found_record->job_ptr
- || (!found_record->bg_block_id)
- || (bg_record == found_record)
+ while ((job_ptr = list_next(job_itr))) {
+ select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data;
+ found_record = jobinfo->bg_record;
+
+ if (!found_record->bg_block_id || (bg_record == found_record)
|| !blocks_overlap(bg_record, found_record))
continue;
- while ((job_ptr = list_next(job_itr))) {
- if (job_ptr == found_record->job_ptr)
+ if (found_record->job_list) {
+ struct job_record *job_ptr2;
+ ListIterator job_itr2 = list_iterator_create(
+ found_record->job_list);
+ while ((job_ptr2 = list_next(job_itr2))) {
+ if (job_ptr != job_ptr2)
+ continue;
+ if (in_jobinfo->units_avail) {
+ if (!bit_overlap(
+ in_jobinfo->units_avail,
+ jobinfo->units_avail)) {
+ debug2("skipping unoverlapping "
+ "%u", job_ptr->job_id);
+ continue;
+ }
+ }
break;
+ }
+ list_iterator_destroy(job_itr2);
+
+ /* We might of already gotten all we needed
+ off this block.
+ */
+ if (!job_ptr2)
+ continue;
}
+
if (job_ptr) {
+ if (!preempt)
+ preempt = list_create(NULL);
list_push(preempt, job_ptr);
-/* info("going to preempt %u running on %s", */
-/* job_ptr->job_id, found_record->bg_block_id); */
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
+ info("going to preempt %u running on %s",
+ job_ptr->job_id,
+ found_record->bg_block_id);
} else if (SELECT_IS_MODE_RUN_NOW(query_mode)) {
error("Job %u running on block %s "
"wasn't in the preempt list, but needs to be "
@@ -1372,13 +1693,13 @@
found_record->job_ptr->job_id,
found_record->bg_block_id,
bg_record->bg_block_id);
- list_destroy(preempt);
- preempt = NULL;
+ if (preempt) {
+ list_destroy(preempt);
+ preempt = NULL;
+ }
break;
}
- list_iterator_reset(job_itr);
}
- list_iterator_destroy(itr);
list_iterator_destroy(job_itr);
slurm_mutex_unlock(&block_state_mutex);
@@ -1410,16 +1731,14 @@
int rc = SLURM_SUCCESS;
bg_record_t* bg_record = NULL;
char buf[256];
- uint16_t conn_type[SYSTEM_DIMENSIONS];
List block_list = NULL;
int blocks_added = 0;
time_t starttime = time(NULL);
uint16_t local_mode = mode;
int avail_cpus = num_unused_cpus;
int dim = 0;
+ select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data;
- for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
- conn_type[dim] = (uint16_t)NO_VAL;
if (preemptee_candidates && preemptee_job_list
&& list_count(preemptee_candidates))
local_mode |= SELECT_MODE_PREEMPT_FLAG;
@@ -1433,23 +1752,25 @@
block_list = copy_bg_list(bg_lists->main);
slurm_mutex_unlock(&block_state_mutex);
- get_select_jobinfo(job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_CONN_TYPE, &conn_type);
- if (conn_type[0] == SELECT_NAV) {
- if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
- conn_type[0] = SELECT_SMALL;
- else if (min_nodes > 1) {
- for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
- conn_type[dim] = SELECT_TORUS;
- } else if (job_ptr->details->min_cpus < bg_conf->cpus_per_mp)
- conn_type[0] = SELECT_SMALL;
- else {
+ if (!bg_conf->sub_blocks && (jobinfo->conn_type[0] == SELECT_NAV)) {
+ if (bg_conf->sub_mp_sys) {
+ jobinfo->conn_type[0] = SELECT_SMALL;
for (dim=1; dim<SYSTEM_DIMENSIONS; dim++)
- conn_type[dim] = SELECT_NAV;
+ jobinfo->conn_type[dim] = SELECT_NAV;
+ } else if (!bg_conf->sub_blocks &&
+ (job_ptr->details->min_cpus
+ < bg_conf->cpus_per_mp)) {
+ jobinfo->conn_type[0] = SELECT_SMALL;
+ for (dim=1; dim<SYSTEM_DIMENSIONS; dim++)
+ jobinfo->conn_type[dim] = SELECT_NAV;
+ } else {
+ for (dim=1; dim<SYSTEM_DIMENSIONS; dim++)
+ jobinfo->conn_type[dim] = SELECT_NAV;
}
- set_select_jobinfo(job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_CONN_TYPE,
- &conn_type);
+ } else if (bg_conf->sub_blocks
+ && (job_ptr->details->max_cpus < bg_conf->cpus_per_mp)) {
+ for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
+ jobinfo->conn_type[dim] = SELECT_NAV;
}
if (slurm_block_bitmap && !bit_set_count(slurm_block_bitmap)) {
@@ -1465,7 +1786,7 @@
buf, sizeof(buf),
SELECT_PRINT_MIXED);
- debug("bluegene:submit_job: %u mode=%d %s nodes=%u-%u-%u",
+ debug("bluegene:submit_job: %u mode=%d %s mps=%u-%u-%u",
job_ptr->job_id, local_mode, buf,
min_nodes, req_nodes, max_nodes);
@@ -1522,14 +1843,64 @@
while ((preempt_job_ptr = list_next(job_itr))) {
while ((found_record = list_next(itr))) {
if (found_record->job_ptr == preempt_job_ptr) {
- /* info("removing job %u running on %s", */
- /* preempt_job_ptr->job_id, */
- /* found_record->bg_block_id); */
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("removing job %u running "
+ "on %s",
+ preempt_job_ptr->job_id,
+ found_record->bg_block_id);
found_record->job_ptr = NULL;
found_record->job_running =
NO_JOB_RUNNING;
avail_cpus += found_record->cpu_cnt;
+ found_record->avail_set = false;
break;
+ } else if (found_record->job_list &&
+ list_count(found_record->job_list)) {
+ select_jobinfo_t *found_jobinfo;
+ ba_mp_t *ba_mp;
+ struct job_record *found_job_ptr;
+ ListIterator job_list_itr =
+ list_iterator_create(
+ found_record->job_list);
+ while ((found_job_ptr = list_next(
+ job_list_itr))) {
+ if (found_job_ptr
+ != preempt_job_ptr)
+ continue;
+ found_jobinfo = found_job_ptr->
+ select_jobinfo->data;
+ ba_mp = list_peek(found_record->
+ ba_mp_list);
+
+ xassert(ba_mp);
+ xassert(ba_mp->cnode_bitmap);
+
+ bit_not(found_jobinfo->
+ units_avail);
+ bit_and(ba_mp->cnode_bitmap,
+ found_jobinfo->
+ units_avail);
+ bit_not(found_jobinfo->
+ units_avail);
+
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_BG_PICK)
+ info("removing job %u "
+ "running on %s",
+ preempt_job_ptr->
+ job_id,
+ found_record->
+ bg_block_id);
+ list_delete_item(job_list_itr);
+ avail_cpus += found_job_ptr->
+ total_cpus;
+ found_record->avail_set = false;
+ break;
+ }
+ list_iterator_destroy(job_list_itr);
+ if (found_job_ptr)
+ break;
}
}
if (!found_record) {
@@ -1558,6 +1929,7 @@
}
if (rc == SLURM_SUCCESS) {
+ time_t max_end_time = 0;
if (!bg_record)
fatal("we got a success, but no block back");
/* Here we see if there is a job running since
@@ -1567,14 +1939,52 @@
* past or current time) we add 5 seconds to
* it so we don't use the block immediately.
*/
- if (bg_record->job_ptr
- && bg_record->job_ptr->end_time) {
- if (bg_record->job_ptr->end_time <= starttime)
+ if (bg_record->job_ptr && bg_record->job_ptr->end_time) {
+ max_end_time = bg_record->job_ptr->end_time;
+ } else if (bg_record->job_running == BLOCK_ERROR_STATE)
+ max_end_time = INFINITE;
+ else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ bitstr_t *total_bitmap;
+ bool need_free = false;
+ ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
+ xassert(ba_mp->cnode_bitmap);
+
+ if (bg_record->err_ratio) {
+ xassert(ba_mp->cnode_err_bitmap);
+ total_bitmap = bit_copy(ba_mp->cnode_bitmap);
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
+ need_free = true;
+ } else
+ total_bitmap = ba_mp->cnode_bitmap;
+ /* Only look at the jobs here if we don't have
+ enough space on the block. jobinfo is set up
+ at the beginning of the function in case
+ you were wondering.
+ */
+ if (jobinfo->cnode_cnt >
+ bit_clear_count(total_bitmap)) {
+ struct job_record *found_job_ptr =
+ _get_last_job(bg_record);
+ max_end_time = found_job_ptr->end_time;
+ }
+ if (need_free)
+ FREE_NULL_BITMAP(total_bitmap);
+ }
+
+ /* If there are any jobs running max_end_time will
+ * be set to something (ie: it won't still be 0)
+ * so in this case, and only this case, we need to
+ * update the value of starttime. Otherwise leave
+ * it as is.
+ */
+ if (max_end_time) {
+ if (max_end_time <= starttime)
starttime += 5;
else
- starttime = bg_record->job_ptr->end_time;
- } else if (bg_record->job_running == BLOCK_ERROR_STATE)
- starttime = INFINITE;
+ starttime = max_end_time;
+ }
/* make sure the job is eligible to run */
if (job_ptr->details->begin_time > starttime)
@@ -1582,29 +1992,33 @@
job_ptr->start_time = starttime;
- set_select_jobinfo(job_ptr->select_jobinfo->data,
+ set_select_jobinfo(jobinfo,
SELECT_JOBDATA_NODES,
bg_record->mp_str);
- set_select_jobinfo(job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_IONODES,
- bg_record->ionode_str);
+ if (!bg_record->job_list)
+ set_select_jobinfo(jobinfo,
+ SELECT_JOBDATA_IONODES,
+ bg_record->ionode_str);
+
if (!bg_record->bg_block_id) {
debug("%d can start unassigned job %u "
"at %ld on %s",
local_mode, job_ptr->job_id,
starttime, bg_record->mp_str);
- set_select_jobinfo(job_ptr->select_jobinfo->data,
+ set_select_jobinfo(jobinfo,
SELECT_JOBDATA_BLOCK_PTR,
NULL);
- set_select_jobinfo(job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_NODE_CNT,
- &bg_record->cnode_cnt);
} else {
- if ((bg_record->ionode_str)
- && (job_ptr->part_ptr->max_share <= 1))
- error("Small block used in "
- "non-shared partition");
+ if (job_ptr->part_ptr
+ && job_ptr->part_ptr->max_share <= 1) {
+ if (bg_record->ionode_str)
+ error("Small block used in a "
+ "non-shared partition");
+ else if (jobinfo->ionode_str)
+ error("Sub-block jobs in a "
+ "non-shared partition");
+ }
debug("%d(%d) can start job %u "
"at %ld on %s(%s) %d",
@@ -1621,14 +2035,48 @@
*/
if (bg_record->original)
bg_record = bg_record->original;
- set_select_jobinfo(
- job_ptr->select_jobinfo->data,
- SELECT_JOBDATA_BLOCK_PTR,
- bg_record);
+ set_select_jobinfo(jobinfo,
+ SELECT_JOBDATA_BLOCK_PTR,
+ bg_record);
+
+ if ((jobinfo->conn_type[0] != SELECT_NAV)
+ && (jobinfo->conn_type[0]
+ < SELECT_SMALL)) {
+ for (dim=0; dim<SYSTEM_DIMENSIONS;
+ dim++)
+ jobinfo->conn_type[dim] =
+ bg_record->conn_type[
+ dim];
+ }
+
+ _build_job_resources_struct(job_ptr,
+ slurm_block_bitmap,
+ bg_record);
if (job_ptr) {
- bg_record->job_running =
- job_ptr->job_id;
- bg_record->job_ptr = job_ptr;
+ if (bg_record->job_list) {
+ /* Mark the ba_mp
+ * cnodes as used now.
+ */
+ select_jobinfo_t *jobinfo =
+ job_ptr->
+ select_jobinfo->data;
+ ba_mp_t *ba_mp = list_peek(
+ bg_record->ba_mp_list);
+ xassert(ba_mp);
+ xassert(ba_mp->cnode_bitmap);
+ bit_or(ba_mp->cnode_bitmap,
+ jobinfo->units_avail);
+ if (!find_job_in_bg_record(
+ bg_record,
+ job_ptr->job_id))
+ list_append(bg_record->
+ job_list,
+ job_ptr);
+ } else {
+ bg_record->job_running =
+ job_ptr->job_id;
+ bg_record->job_ptr = job_ptr;
+ }
job_ptr->job_state |= JOB_CONFIGURING;
last_bg_update = time(NULL);
@@ -1647,21 +2095,18 @@
bg_record->job_ptr = NULL;
bg_record->job_running = NO_JOB_RUNNING;
}
-
+ }
+ if (!bg_conf->sub_blocks || (bg_record->mp_count > 1))
set_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_NODE_CNT,
&bg_record->cnode_cnt);
- }
- if (SELECT_IS_MODE_RUN_NOW(local_mode))
- _build_select_struct(job_ptr,
- slurm_block_bitmap,
- bg_record->cnode_cnt);
+
/* set up the preempted job list */
if (SELECT_IS_PREEMPT_SET(local_mode)) {
if (*preemptee_job_list)
list_destroy(*preemptee_job_list);
*preemptee_job_list = _get_preemptables(
- local_mode, bg_record,
+ local_mode, bg_record, job_ptr,
preemptee_candidates);
}
if (!bg_record->bg_block_id) {
diff --git a/src/plugins/select/bluegene/bg_job_run.c b/src/plugins/select/bluegene/bg_job_run.c
index 5253d24..ffc5f05 100644
--- a/src/plugins/select/bluegene/bg_job_run.c
+++ b/src/plugins/select/bluegene/bg_job_run.c
@@ -67,7 +67,7 @@
bool deleting_old_blocks_flag = 0;
-enum update_op {START_OP, TERM_OP, SYNC_OP};
+enum update_op {START_OP, TERM_OP};
typedef struct {
char *bg_block_id;
@@ -85,14 +85,10 @@
} bg_action_t;
static void _destroy_bg_action(void *x);
-static int _excise_block(List block_list,
- char *bg_block_id,
- char *nodes);
-static List _get_all_allocated_blocks(void);
static void * _block_agent(void *args);
static void _block_op(bg_action_t *bg_action_ptr);
static void _start_agent(bg_action_t *bg_action_ptr);
-static void _sync_agent(bg_action_t *bg_action_ptr);
+static void _sync_agent(bg_action_t *bg_action_ptr, bg_record_t *bg_record);
/* block_state_mutex should be locked before
* calling this function. This should only be called in _start_agent.
@@ -107,11 +103,13 @@
if ((bg_record->magic != BLOCK_MAGIC)
|| !block_ptr_exist_in_list(bg_lists->main, bg_record)) {
slurm_mutex_unlock(&block_state_mutex);
- debug("The block %s disappeared while starting "
- "job %u requeueing if possible.",
- bg_action_ptr->bg_block_id,
- bg_action_ptr->job_ptr->job_id);
- bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1);
+ if (bg_action_ptr->job_ptr) {
+ debug("The block %s disappeared while starting "
+ "job %u requeueing if possible.",
+ bg_action_ptr->bg_block_id,
+ bg_action_ptr->job_ptr->job_id);
+ bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1, 0);
+ }
return 0;
}
return 1;
@@ -133,60 +131,47 @@
}
}
-/* Update block user and reboot as needed */
-static void _sync_agent(bg_action_t *bg_action_ptr)
+/* Update block user and reboot as needed block_state_mutex needs to
+ * be locked before coming in. */
+static void _sync_agent(bg_action_t *bg_action_ptr, bg_record_t *bg_record)
{
- bg_record_t * bg_record = NULL;
+ struct job_record *job_ptr = bg_action_ptr->job_ptr;
- slurm_mutex_lock(&block_state_mutex);
- bg_record = find_bg_record_in_list(bg_lists->main,
- bg_action_ptr->bg_block_id);
- if (!bg_record) {
- slurm_mutex_unlock(&block_state_mutex);
- error("No block %s", bg_action_ptr->bg_block_id);
- bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1);
- return;
- }
+ debug3("Queue sync of job %u in BG block %s ending at %ld",
+ job_ptr->job_id, bg_action_ptr->bg_block_id,
+ job_ptr->end_time);
last_bg_update = time(NULL);
- bg_action_ptr->job_ptr->total_cpus =
- bg_action_ptr->job_ptr->details->min_cpus = bg_record->cpu_cnt;
- bg_record->job_running = bg_action_ptr->job_ptr->job_id;
- bg_record->job_ptr = bg_action_ptr->job_ptr;
- set_select_jobinfo(bg_record->job_ptr->select_jobinfo->data,
+
+ ba_sync_job_to_block(bg_record, job_ptr);
+
+ set_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_BLOCK_PTR,
bg_record);
- if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) {
+ num_unused_cpus -= job_ptr->total_cpus;
+
+ if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record))
list_push(bg_lists->job_running, bg_record);
- num_unused_cpus -= bg_record->cpu_cnt;
- }
+
if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
list_push(bg_lists->booted, bg_record);
if (bg_record->state == BG_BLOCK_INITED) {
- if (bg_record->job_ptr) {
- bg_record->job_ptr->job_state &= (~JOB_CONFIGURING);
- last_job_update = time(NULL);
+ int sync_user_rc;
+ job_ptr->job_state &= (~JOB_CONFIGURING);
+ last_job_update = time(NULL);
+ /* Just in case reset the boot flags */
+ bg_record->boot_state = 0;
+ bg_record->boot_count = 0;
+ sync_user_rc = bridge_block_sync_users(bg_record);
+
+ if (sync_user_rc == SLURM_ERROR) {
+ slurm_mutex_unlock(&block_state_mutex);
+ (void) slurm_fail_job(job_ptr->job_id);
+ slurm_mutex_lock(&block_state_mutex);
}
- if (bg_record->user_uid != bg_action_ptr->job_ptr->user_id) {
- int set_user_rc = SLURM_SUCCESS;
-
- debug("User isn't correct for job %d on %s, "
- "fixing...",
- bg_action_ptr->job_ptr->job_id,
- bg_action_ptr->bg_block_id);
- xfree(bg_record->target_name);
- bg_record->target_name =
- uid_to_string(bg_action_ptr->job_ptr->user_id);
- set_user_rc = set_block_user(bg_record);
- slurm_mutex_unlock(&block_state_mutex);
-
- if (set_user_rc == SLURM_ERROR)
- (void) slurm_fail_job(bg_record->job_running);
- } else
- slurm_mutex_unlock(&block_state_mutex);
-
+ _destroy_bg_action(bg_action_ptr);
} else {
if (bg_record->state != BG_BLOCK_BOOTING) {
error("Block %s isn't ready and isn't "
@@ -196,8 +181,9 @@
debug("Block %s is booting, job ok",
bg_action_ptr->bg_block_id);
}
- slurm_mutex_unlock(&block_state_mutex);
- _start_agent(bg_action_ptr);
+ /* the function _block_op calls will destroy the
+ bg_action_ptr */
+ _block_op(bg_action_ptr);
}
}
@@ -210,29 +196,35 @@
ListIterator itr;
List delete_list = NULL;
int requeue_job = 0;
+ uint32_t req_job_id = bg_action_ptr->job_ptr->job_id;
+ bool block_inited = 0;
+ bool delete_it = 0;
slurm_mutex_lock(&block_state_mutex);
bg_record = find_bg_record_in_list(bg_lists->main,
bg_action_ptr->bg_block_id);
if (!bg_record) {
+ bg_record->modifying = 0;
slurm_mutex_unlock(&block_state_mutex);
error("block %s not found in bg_lists->main",
bg_action_ptr->bg_block_id);
- bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1);
+ bg_requeue_job(req_job_id, 1, 0);
return;
}
- if (bg_record->job_running <= NO_JOB_RUNNING) {
+ if ((bg_record->job_running <= NO_JOB_RUNNING)
+ && !find_job_in_bg_record(bg_record, req_job_id)) {
+ bg_record->modifying = 0;
// bg_reset_block(bg_record); should already happened
slurm_mutex_unlock(&block_state_mutex);
debug("job %u finished during the queueing job "
"(everything is ok)",
- bg_action_ptr->job_ptr->job_id);
+ req_job_id);
return;
}
- if (bg_record->state == BG_BLOCK_TERM) {
- debug("Block is in Deallocating state, waiting for free.");
+
+ if ((bg_record->state == BG_BLOCK_TERM) || bg_record->free_cnt) {
/* It doesn't appear state of a small block
(conn_type) is held on a BGP system so
if we to reset it so, just set the reboot flag and
@@ -243,7 +235,7 @@
delete_list = list_create(NULL);
itr = list_iterator_create(bg_lists->main);
while ((found_record = list_next(itr))) {
- if ((!found_record) || (bg_record == found_record))
+ if (bg_record == found_record)
continue;
if (!blocks_overlap(bg_record, found_record)) {
@@ -253,16 +245,22 @@
continue;
}
- if (found_record->job_ptr) {
+ if (found_record->job_ptr
+ || (found_record->job_list
+ && list_count(found_record->job_list))) {
+ struct job_record *job_ptr = found_record->job_ptr;
+ if (!found_record->job_ptr)
+ job_ptr = find_job_in_bg_record(
+ found_record, NO_VAL);
error("Trying to start job %u on block %s, "
"but there is a job %u running on an overlapping "
"block %s it will not end until %ld. "
"This should never happen.",
- bg_action_ptr->job_ptr->job_id,
+ req_job_id,
bg_record->bg_block_id,
- found_record->job_ptr->job_id,
+ job_ptr->job_id,
found_record->bg_block_id,
- found_record->job_ptr->end_time);
+ job_ptr->end_time);
requeue_job = 1;
break;
}
@@ -277,42 +275,87 @@
if (requeue_job) {
list_destroy(delete_list);
- bg_reset_block(bg_record);
+ bg_reset_block(bg_record, bg_action_ptr->job_ptr);
+ bg_record->modifying = 0;
slurm_mutex_unlock(&block_state_mutex);
- bg_requeue_job(bg_action_ptr->job_ptr->job_id, 0);
+ bg_requeue_job(req_job_id, 0, 0);
return;
}
slurm_mutex_unlock(&block_state_mutex);
- rc = free_block_list(bg_action_ptr->job_ptr->job_id, delete_list, 0, 1);
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
+ delete_it = 1;
+ rc = free_block_list(req_job_id, delete_list, delete_it, 1);
list_destroy(delete_list);
if (rc != SLURM_SUCCESS) {
error("Problem with deallocating blocks to run job %u "
- "on block %s", bg_action_ptr->job_ptr->job_id,
+ "on block %s", req_job_id,
bg_action_ptr->bg_block_id);
+ slurm_mutex_lock(&block_state_mutex);
+ /* Failure will unlock block_state_mutex so no need to
+ unlock before return. No need to reset modifying
+ here if the block doesn't exist.
+ */
+ if (_make_sure_block_still_exists(bg_action_ptr, bg_record)) {
+ bg_record->modifying = 0;
+ slurm_mutex_unlock(&block_state_mutex);
+ }
+
if (IS_JOB_CONFIGURING(bg_action_ptr->job_ptr))
- bg_requeue_job(bg_action_ptr->job_ptr->job_id, 0);
+ bg_requeue_job(req_job_id, 0, 0);
return;
}
- slurm_mutex_lock(&block_state_mutex);
- /* Failure will unlock block_state_mutex so no need to unlock before
- return. Failure will unlock block_state_mutex so no need to unlock
- before return.
+ while (1) {
+ slurm_mutex_lock(&block_state_mutex);
+ /* Failure will unlock block_state_mutex so no need to
+ unlock before return. No need to reset modifying
+ here if the block doesn't exist.
+ */
+ if (!_make_sure_block_still_exists(bg_action_ptr, bg_record))
+ return;
+ /* If another thread is freeing this block we need to
+ wait until it is done or we will get into a state
+ where this job will be killed.
+ */
+ if (!bg_record->free_cnt)
+ break;
+ debug("Waiting for block %s to free for job %u. "
+ "%d thread(s) trying to free it",
+ bg_record->bg_block_id, req_job_id,
+ bg_record->free_cnt);
+ slurm_mutex_unlock(&block_state_mutex);
+ sleep(1);
+ }
+ /* This was set in the start_job function to close the above
+ window where a job could be mistakenly requeued if another
+ thread is trying to free this block as we are trying to run
+ on it, which is fine since we will reboot it later.
*/
- if (!_make_sure_block_still_exists(bg_action_ptr, bg_record))
- return;
+ bg_record->modifying = 0;
- if (bg_record->job_running <= NO_JOB_RUNNING) {
+ if ((bg_record->job_running <= NO_JOB_RUNNING)
+ && !find_job_in_bg_record(bg_record, req_job_id)) {
// bg_reset_block(bg_record); should already happened
slurm_mutex_unlock(&block_state_mutex);
debug("job %u already finished before boot",
- bg_action_ptr->job_ptr->job_id);
+ req_job_id);
return;
}
+ if (bg_record->job_list
+ && (bg_action_ptr->job_ptr->total_cpus != bg_record->cpu_cnt)
+ && (list_count(bg_record->job_list) != 1)) {
+ /* We don't allow modification of a block or reboot of
+ a block if we are running multiple jobs on the
+ block.
+ */
+ debug2("no reboot");
+ goto no_reboot;
+ }
+
rc = 0;
#ifdef HAVE_BGL
if (bg_action_ptr->blrtsimage
@@ -326,9 +369,16 @@
#elif defined HAVE_BGP
if ((bg_action_ptr->conn_type[0] >= SELECT_SMALL)
&& (bg_action_ptr->conn_type[0] != bg_record->conn_type[0])) {
- debug3("changing small block mode from %s to %s",
- conn_type_string(bg_record->conn_type[0]),
- conn_type_string(bg_action_ptr->conn_type[0]));
+ if (bg_conf->slurm_debug_level >= LOG_LEVEL_DEBUG3) {
+ char *req_conn_type =
+ conn_type_string_full(bg_action_ptr->conn_type);
+ char *conn_type =
+ conn_type_string_full(bg_record->conn_type);
+ debug3("changing small block mode from %s to %s",
+ conn_type, req_conn_type);
+ xfree(req_conn_type);
+ xfree(conn_type);
+ }
rc = 1;
# ifndef HAVE_BG_FILES
/* since we don't check state on an emulated system we
@@ -465,6 +515,7 @@
bg_record->modifying = 0;
}
+no_reboot:
if (bg_record->state == BG_BLOCK_FREE) {
if ((rc = bridge_block_boot(bg_record)) != SLURM_SUCCESS) {
char reason[200];
@@ -501,11 +552,12 @@
}
- if (bg_record->job_running <= NO_JOB_RUNNING) {
+ if ((bg_record->job_running <= NO_JOB_RUNNING)
+ && !find_job_in_bg_record(bg_record, req_job_id)) {
slurm_mutex_unlock(&block_state_mutex);
debug("job %u finished during the start of the boot "
"(everything is ok)",
- bg_action_ptr->job_ptr->job_id);
+ req_job_id);
return;
}
@@ -513,21 +565,28 @@
changes, and needs to outlast a job allocation.
*/
/* bg_record->boot_count = 0; */
- xfree(bg_record->target_name);
- bg_record->target_name = uid_to_string(bg_action_ptr->job_ptr->user_id);
- debug("setting the target_name for Block %s to %s",
- bg_record->bg_block_id, bg_record->target_name);
-
if (bg_record->state == BG_BLOCK_INITED) {
- debug("block %s is ready.", bg_record->bg_block_id);
- set_user_rc = set_block_user(bg_record);
- if (bg_action_ptr->job_ptr) {
- bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING);
- last_job_update = time(NULL);
- }
+ debug("block %s is already ready.", bg_record->bg_block_id);
+ /* Just in case reset the boot flags */
+ bg_record->boot_state = 0;
+ bg_record->boot_count = 0;
+ set_user_rc = bridge_block_sync_users(bg_record);
+ block_inited = 1;
}
slurm_mutex_unlock(&block_state_mutex);
+ /* This lock needs to happen after the block_state_mutex to
+ avoid deadlock.
+ */
+ if (block_inited && bg_action_ptr->job_ptr) {
+ slurmctld_lock_t job_write_lock = {
+ NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+ lock_slurmctld(job_write_lock);
+ bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING);
+ last_job_update = time(NULL);
+ unlock_slurmctld(job_write_lock);
+ }
+
if (set_user_rc == SLURM_ERROR) {
sleep(2);
/* wait for the slurmd to begin
@@ -535,13 +594,7 @@
is a no-op if issued prior
to the script initiation do clean up just
incase the fail job isn't ran */
- (void) slurm_fail_job(bg_record->job_running);
- slurm_mutex_lock(&block_state_mutex);
- if (remove_from_bg_list(bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
- num_unused_cpus += bg_record->cpu_cnt;
-
- slurm_mutex_unlock(&block_state_mutex);
+ (void) slurm_fail_job(req_job_id);
}
}
@@ -552,9 +605,8 @@
if (bg_action_ptr->op == START_OP)
_start_agent(bg_action_ptr);
else if (bg_action_ptr->op == TERM_OP)
- bridge_block_post_job(bg_action_ptr->bg_block_id);
- else if (bg_action_ptr->op == SYNC_OP)
- _sync_agent(bg_action_ptr);
+ bridge_block_post_job(bg_action_ptr->bg_block_id,
+ bg_action_ptr->job_ptr);
_destroy_bg_action(bg_action_ptr);
return NULL;
@@ -584,77 +636,6 @@
slurm_attr_destroy(&attr_agent);
}
-
-/* get a list of all BG blocks with users block_state_mutex must be
- * unlocked before entering here. */
-static List _get_all_allocated_blocks(void)
-{
- List ret_list = list_create(destroy_bg_record);
- ListIterator itr;
- bg_record_t *bg_record = NULL;
- bg_record_t *rm_record = NULL;
-
- if (!ret_list)
- fatal("malloc error");
-
- xassert(bg_lists->main);
- slurm_mutex_lock(&block_state_mutex);
- itr = list_iterator_create(bg_lists->main);
- while ((bg_record = list_next(itr))) {
- if ((bg_record->magic != BLOCK_MAGIC) || bg_record->free_cnt
- || !bg_record->user_name || !bg_record->bg_block_id)
- continue;
- rm_record = xmalloc(sizeof(bg_record_t));
- rm_record->magic = BLOCK_MAGIC;
- rm_record->bg_block_id =
- xstrdup(bg_record->bg_block_id);
- rm_record->mp_str = xstrdup(bg_record->mp_str);
- list_append(ret_list, rm_record);
- }
- list_iterator_destroy(itr);
-
- slurm_mutex_unlock(&block_state_mutex);
-
- return ret_list;
-}
-
-/* remove a BG block from the given list */
-static int _excise_block(List block_list, char *bg_block_id,
- char *nodes)
-{
- int rc = SLURM_SUCCESS;
- ListIterator iter;
- bg_record_t *bg_record = NULL;
-
- if (block_list) {
- iter = list_iterator_create(block_list);
- xassert(iter);
- while ((bg_record = list_next(iter))) {
- rc = SLURM_ERROR;
- if (strcmp(bg_record->bg_block_id, bg_block_id))
- continue;
- if (strcmp(bg_record->mp_str, nodes)) {
- /* changed bgblock */
- error("bg_block_id:%s old_nodes:%s "
- "new_nodes:%s",
- bg_block_id, nodes, bg_record->mp_str);
- break;
- }
-
- /* exact match of name and node list */
- debug("synced Block %s", bg_block_id);
- list_delete_item(iter);
- rc = SLURM_SUCCESS;
- break;
- }
- list_iterator_destroy(iter);
- } else {
- error("_excise_block: No block_list");
- rc = SLURM_ERROR;
- }
- return rc;
-}
-
/*
* Perform any work required to terminate a jobs on a block.
* bg_block_id IN - block name
@@ -769,15 +750,26 @@
}
last_bg_update = time(NULL);
- job_ptr->total_cpus = job_ptr->details->min_cpus = bg_record->cpu_cnt;
- bg_record->job_running = bg_action_ptr->job_ptr->job_id;
- bg_record->job_ptr = bg_action_ptr->job_ptr;
- if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) {
- list_push(bg_lists->job_running, bg_record);
- num_unused_cpus -= bg_record->cpu_cnt;
+
+ if (bg_record->job_list) {
+ if (!find_job_in_bg_record(bg_record, job_ptr->job_id))
+ list_append(bg_record->job_list, job_ptr);
+ } else {
+ bg_record->job_running = bg_action_ptr->job_ptr->job_id;
+ bg_record->job_ptr = bg_action_ptr->job_ptr;
}
+ num_unused_cpus -= job_ptr->total_cpus;
+
+ if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record))
+ list_push(bg_lists->job_running, bg_record);
+
if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
list_push(bg_lists->booted, bg_record);
+ /* Just incase something happens to free this block before we
+ start the job we will make it so this job doesn't get blown
+ away.
+ */
+ bg_record->modifying = 1;
slurm_mutex_unlock(&block_state_mutex);
info("Queue start of job %u in BG block %s",
@@ -822,11 +814,11 @@
*/
extern int sync_jobs(List job_list)
{
- ListIterator job_iterator;
+ ListIterator itr;
struct job_record *job_ptr = NULL;
- bg_action_t *bg_action_ptr = NULL;
List block_list = NULL;
static bool run_already = false;
+ bg_record_t *bg_record = NULL;
/* Execute only on initial startup. We don't support bgblock
* creation on demand today, so there is no need to re-sync data. */
@@ -838,16 +830,19 @@
error("sync_jobs: no job_list");
return SLURM_ERROR;
}
+ slurm_mutex_lock(&block_state_mutex);
/* Insure that all running jobs own the specified block */
- block_list = _get_all_allocated_blocks();
- job_iterator = list_iterator_create(job_list);
- while ((job_ptr = list_next(job_iterator))) {
- bool good_block = true;
- if (!IS_JOB_RUNNING(job_ptr))
+ itr = list_iterator_create(job_list);
+ while ((job_ptr = list_next(itr))) {
+ bg_action_t *bg_action_ptr = NULL;
+ if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_COMPLETING(job_ptr))
continue;
bg_action_ptr = xmalloc(sizeof(bg_action_t));
- bg_action_ptr->op = SYNC_OP;
+ if (IS_JOB_COMPLETING(job_ptr))
+ bg_action_ptr->op = TERM_OP;
+ else
+ bg_action_ptr->op = START_OP;
bg_action_ptr->job_ptr = job_ptr;
get_select_jobinfo(job_ptr->select_jobinfo->data,
@@ -877,42 +872,57 @@
if (bg_action_ptr->bg_block_id == NULL) {
error("Running job %u has bgblock==NULL",
job_ptr->job_id);
- good_block = false;
} else if (job_ptr->nodes == NULL) {
error("Running job %u has nodes==NULL",
job_ptr->job_id);
- good_block = false;
- } else if (_excise_block(block_list,
- bg_action_ptr->bg_block_id,
- job_ptr->nodes)
- != SLURM_SUCCESS) {
+ } else if (!(bg_record = find_bg_record_in_list(
+ bg_lists->main,
+ bg_action_ptr->bg_block_id))) {
error("Kill job %u belongs to defunct "
"bgblock %s",
job_ptr->job_id,
bg_action_ptr->bg_block_id);
- good_block = false;
}
- if (!good_block) {
- job_ptr->job_state = JOB_FAILED
- | JOB_COMPLETING;
- job_ptr->end_time = time(NULL);
- last_job_update = time(NULL);
+
+ if (!bg_record) {
+ /* Don't use slurm_fail_job, locks are already
+ in place.
+ */
+ job_fail(job_ptr->job_id);
_destroy_bg_action(bg_action_ptr);
continue;
}
-
- debug3("Queue sync of job %u in BG block %s "
- "ending at %ld",
- job_ptr->job_id,
- bg_action_ptr->bg_block_id,
- job_ptr->end_time);
- _block_op(bg_action_ptr);
+ /* _sync_agent will destroy the bg_action_ptr */
+ _sync_agent(bg_action_ptr, bg_record);
}
- list_iterator_destroy(job_iterator);
+ list_iterator_destroy(itr);
+
+ block_list = list_create(destroy_bg_record);
+ itr = list_iterator_create(bg_lists->main);
+ while ((bg_record = list_next(itr))) {
+ bg_record_t *rm_record;
+ if (bg_record->job_ptr
+ || (bg_record->job_list
+ && list_count(bg_record->job_list)))
+ continue;
+ rm_record = xmalloc(sizeof(bg_record_t));
+ rm_record->magic = BLOCK_MAGIC;
+ rm_record->bg_block_id = xstrdup(bg_record->bg_block_id);
+ rm_record->mp_str = xstrdup(bg_record->mp_str);
+ list_append(block_list, rm_record);
+ }
+ list_iterator_destroy(itr);
+ slurm_mutex_unlock(&block_state_mutex);
/* Insure that all other blocks are free of users */
if (block_list) {
- bridge_reset_block_list(block_list);
+ itr = list_iterator_create(block_list);
+ while ((bg_record = list_next(itr))) {
+ info("Queue clearing of users of BG block %s",
+ bg_record->bg_block_id);
+ term_jobs_on_block(bg_record->bg_block_id);
+ }
+ list_iterator_destroy(itr);
list_destroy(block_list);
} else {
/* this should never happen,
diff --git a/src/plugins/select/bluegene/bg_list_functions.c b/src/plugins/select/bluegene/bg_list_functions.c
index b9b9c58..e3c92df 100644
--- a/src/plugins/select/bluegene/bg_list_functions.c
+++ b/src/plugins/select/bluegene/bg_list_functions.c
@@ -222,3 +222,33 @@
list_iterator_destroy(itr);
return found_record;
}
+
+/* Check to see if a job has been added to the bg_record NO_VAL
+ * returns the first one on the list. */
+extern struct job_record *find_job_in_bg_record(bg_record_t *bg_record,
+ uint32_t job_id)
+{
+ ListIterator itr;
+ struct job_record *job_ptr;
+
+ xassert(bg_record);
+
+ if (!bg_record->job_list)
+ return NULL;
+ itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("find_job_in_bg_record: "
+ "bad magic found when "
+ "looking at block %s",
+ bg_record->bg_block_id);
+ list_delete_item(itr);
+ continue;
+ }
+
+ if ((job_ptr->job_id == job_id) || (job_id == NO_VAL))
+ break;
+ }
+ list_iterator_destroy(itr);
+ return job_ptr;
+}
diff --git a/src/plugins/select/bluegene/bg_list_functions.h b/src/plugins/select/bluegene/bg_list_functions.h
index ff4903d..cbe9822 100644
--- a/src/plugins/select/bluegene/bg_list_functions.h
+++ b/src/plugins/select/bluegene/bg_list_functions.h
@@ -60,5 +60,7 @@
extern bg_record_t *find_and_remove_org_from_bg_list(List my_list,
bg_record_t *bg_record);
extern bg_record_t *find_org_in_bg_list(List my_list, bg_record_t *bg_record);
+extern struct job_record *find_job_in_bg_record(bg_record_t *bg_record,
+ uint32_t job_id);
#endif
diff --git a/src/plugins/select/bluegene/bg_node_info.c b/src/plugins/select/bluegene/bg_node_info.c
index 590e81f..0e0fe86 100644
--- a/src/plugins/select/bluegene/bg_node_info.c
+++ b/src/plugins/select/bluegene/bg_node_info.c
@@ -54,6 +54,17 @@
}
}
+static node_subgrp_t *_create_subgrp(List subgrp_list, enum node_states state,
+ uint16_t size)
+{
+ node_subgrp_t *subgrp = xmalloc(sizeof(node_subgrp_t));
+ subgrp->state = state;
+ subgrp->bitmap = bit_alloc(size);
+ list_append(subgrp_list, subgrp);
+
+ return subgrp;
+}
+
static node_subgrp_t *_find_subgrp(List subgrp_list, enum node_states state,
uint16_t size)
{
@@ -66,12 +77,9 @@
break;
}
list_iterator_destroy(itr);
- if (!subgrp) {
- subgrp = xmalloc(sizeof(node_subgrp_t));
- subgrp->state = state;
- subgrp->bitmap = bit_alloc(size);
- list_append(subgrp_list, subgrp);
- }
+
+ if (!subgrp)
+ subgrp = _create_subgrp(subgrp_list, state, size);
return subgrp;
}
@@ -133,7 +141,31 @@
node_subgrp_t *subgrp = NULL;
uint16_t count = 0;
- if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ pack16(nodeinfo->bitmap_size, buffer);
+
+ packstr(nodeinfo->extra_info, buffer);
+ packstr(nodeinfo->failed_cnodes, buffer);
+
+ if (nodeinfo->ba_mp)
+ packstr(nodeinfo->ba_mp->loc, buffer);
+ else
+ packstr(nodeinfo->rack_mp, buffer);
+
+ if (nodeinfo->subgrp_list)
+ count = list_count(nodeinfo->subgrp_list);
+
+ pack16(count, buffer);
+
+ if (count > 0) {
+ itr = list_iterator_create(nodeinfo->subgrp_list);
+ while ((subgrp = list_next(itr))) {
+ _pack_node_subgrp(subgrp, buffer,
+ protocol_version);
+ }
+ list_iterator_destroy(itr);
+ }
+ } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
pack16(nodeinfo->bitmap_size, buffer);
if (nodeinfo->subgrp_list)
@@ -150,6 +182,7 @@
list_iterator_destroy(itr);
}
}
+
return SLURM_SUCCESS;
}
@@ -159,8 +192,35 @@
uint16_t size = 0;
select_nodeinfo_t *nodeinfo_ptr = NULL;
uint32_t j = 0;
+ uint32_t uint32_tmp;
- if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack16(&size, buffer);
+
+ nodeinfo_ptr = select_nodeinfo_alloc((uint32_t)size);
+ *nodeinfo = nodeinfo_ptr;
+
+ safe_unpackstr_xmalloc(&nodeinfo_ptr->extra_info,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&nodeinfo_ptr->failed_cnodes,
+ &uint32_tmp, buffer);
+
+ safe_unpackstr_xmalloc(&nodeinfo_ptr->rack_mp,
+ &uint32_tmp, buffer);
+
+ safe_unpack16(&size, buffer);
+ nodeinfo_ptr->subgrp_list = list_create(_free_node_subgrp);
+ for (j=0; j<size; j++) {
+ node_subgrp_t *subgrp = NULL;
+ if (_unpack_node_subgrp(&subgrp, buffer,
+ nodeinfo_ptr->bitmap_size,
+ protocol_version)
+ != SLURM_SUCCESS)
+ goto unpack_error;
+ list_append(nodeinfo_ptr->subgrp_list, subgrp);
+ }
+ } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
safe_unpack16(&size, buffer);
nodeinfo_ptr = select_nodeinfo_alloc((uint32_t)size);
@@ -168,7 +228,7 @@
safe_unpack16(&size, buffer);
nodeinfo_ptr->subgrp_list = list_create(_free_node_subgrp);
- for(j=0; j<size; j++) {
+ for (j=0; j<size; j++) {
node_subgrp_t *subgrp = NULL;
if (_unpack_node_subgrp(&subgrp, buffer,
nodeinfo_ptr->bitmap_size,
@@ -219,6 +279,9 @@
return EINVAL;
}
nodeinfo->magic = 0;
+ xfree(nodeinfo->extra_info);
+ xfree(nodeinfo->failed_cnodes);
+ xfree(nodeinfo->rack_mp);
if (nodeinfo->subgrp_list)
list_destroy(nodeinfo->subgrp_list);
xfree(nodeinfo);
@@ -233,6 +296,10 @@
int i=0;
bg_record_t *bg_record = NULL;
static time_t last_set_all = 0;
+ ba_mp_t *ba_mp;
+ node_subgrp_t *subgrp = NULL;
+ int bit_count;
+
//uint32_t cluster_flags = slurmdb_setup_cluster_flags();
if (!blocks_are_created)
@@ -261,6 +328,7 @@
slurm_mutex_lock(&block_state_mutex);
for (i=0; i<node_record_count; i++) {
select_nodeinfo_t *nodeinfo;
+
node_ptr = &(node_record_table_ptr[i]);
xassert(node_ptr->select_nodeinfo);
nodeinfo = node_ptr->select_nodeinfo->data;
@@ -270,15 +338,47 @@
if (nodeinfo->bitmap_size != g_bitmap_size)
nodeinfo->bitmap_size = g_bitmap_size;
}
+
itr = list_iterator_create(bg_lists->main);
while ((bg_record = list_next(itr))) {
enum node_states state = NODE_STATE_UNKNOWN;
- node_subgrp_t *subgrp = NULL;
select_nodeinfo_t *nodeinfo;
bitstr_t *bitmap;
+ ListIterator itr2 = NULL;
/* Only mark unidle blocks */
- if (bg_record->job_running == NO_JOB_RUNNING)
+ if (bg_record->job_list && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ select_jobinfo_t *jobinfo;
+ ListIterator itr =
+ list_iterator_create(bg_record->job_list);
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
+ xassert(node_ptr->select_nodeinfo);
+ nodeinfo = node_ptr->select_nodeinfo->data;
+ xassert(nodeinfo);
+ xassert(nodeinfo->subgrp_list);
+ if (ba_mp->cnode_err_bitmap
+ && (bit_count =
+ bit_set_count(ba_mp->cnode_err_bitmap))) {
+ subgrp = _find_subgrp(nodeinfo->subgrp_list,
+ NODE_STATE_ERROR,
+ g_bitmap_size);
+ /* FIXME: the subgrp->bitmap isn't set here. */
+ subgrp->cnode_cnt += bit_count;
+ }
+
+ subgrp = _find_subgrp(nodeinfo->subgrp_list,
+ NODE_STATE_ALLOCATED,
+ g_bitmap_size);
+ while ((job_ptr = list_next(itr))) {
+ jobinfo = job_ptr->select_jobinfo->data;
+ /* FIXME: the subgrp->bitmap isn't set here. */
+ subgrp->cnode_cnt += jobinfo->cnode_cnt;
+ }
+ list_iterator_destroy(itr);
+ continue;
+ } else if (bg_record->job_running == NO_JOB_RUNNING)
continue;
if (bg_record->state & BG_BLOCK_ERROR_FLAG)
@@ -299,18 +399,31 @@
/* && (state != NODE_STATE_ERROR)) */
/* bitmap = bg_record->cnodes_used_bitmap; */
/* else */
- bitmap = bg_record->ionode_bitmap;
+ bitmap = bg_record->ionode_bitmap;
- for (i=0; i<node_record_count; i++) {
- if (!bit_test(bg_record->mp_bitmap, i))
+ itr2 = list_iterator_create(bg_record->ba_mp_list);
+ while ((ba_mp = list_next(itr2))) {
+ if (!ba_mp->used)
continue;
- node_ptr = &(node_record_table_ptr[i]);
+
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
xassert(node_ptr->select_nodeinfo);
nodeinfo = node_ptr->select_nodeinfo->data;
xassert(nodeinfo);
xassert(nodeinfo->subgrp_list);
+ if (ba_mp->cnode_err_bitmap
+ && (state == NODE_STATE_ALLOCATED)
+ && (bit_count =
+ bit_set_count(ba_mp->cnode_err_bitmap))) {
+ subgrp = _find_subgrp(nodeinfo->subgrp_list,
+ NODE_STATE_ERROR,
+ g_bitmap_size);
+ /* FIXME: the subgrp->bitmap isn't set here. */
+ subgrp->cnode_cnt += bit_count;
+ }
+
subgrp = _find_subgrp(nodeinfo->subgrp_list,
state, g_bitmap_size);
@@ -333,6 +446,7 @@
}
}
}
+ list_iterator_destroy(itr2);
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
@@ -358,7 +472,7 @@
}
if (nodeinfo->magic != NODEINFO_MAGIC) {
- error("get_nodeinfo: jobinfo magic bad");
+ error("get_nodeinfo: nodeinfo magic bad");
return SLURM_ERROR;
}
@@ -398,6 +512,12 @@
}
list_iterator_destroy(itr);
break;
+ case SELECT_NODEDATA_RACK_MP:
+ if (nodeinfo->ba_mp)
+ *tmp_char = xstrdup(nodeinfo->ba_mp->loc);
+ else if (nodeinfo->rack_mp)
+ *tmp_char = xstrdup(nodeinfo->rack_mp);
+ break;
case SELECT_NODEDATA_STR:
*tmp_char = NULL;
if (!nodeinfo->subgrp_list)
@@ -411,6 +531,13 @@
}
list_iterator_destroy(itr);
break;
+ case SELECT_NODEDATA_EXTRA_INFO:
+ if (nodeinfo->extra_info)
+ *tmp_char = xstrdup(nodeinfo->extra_info);
+ if (nodeinfo->failed_cnodes)
+ xstrfmtcat(*tmp_char, "Failed cnodes=%s",
+ nodeinfo->failed_cnodes);
+ break;
default:
error("Unsupported option %d for get_nodeinfo.", dinfo);
rc = SLURM_ERROR;
diff --git a/src/plugins/select/bluegene/bg_node_info.h b/src/plugins/select/bluegene/bg_node_info.h
index 2cf1912..8fdcd44 100644
--- a/src/plugins/select/bluegene/bg_node_info.h
+++ b/src/plugins/select/bluegene/bg_node_info.h
@@ -55,7 +55,15 @@
struct select_nodeinfo {
ba_mp_t *ba_mp;
uint16_t bitmap_size;
+ char *extra_info; /* Currently used to tell if a cable
+ is in an error state.
+ */
+ char *failed_cnodes; /* Currently used to any cnodes are in
+ an SoftwareFailure state.
+ */
uint16_t magic; /* magic number */
+ char *rack_mp; /* name of midplane in rack - midplane
+ format */
List subgrp_list;
};
diff --git a/src/plugins/select/bluegene/bg_read_config.c b/src/plugins/select/bluegene/bg_read_config.c
index 833f254..ec35807 100644
--- a/src/plugins/select/bluegene/bg_read_config.c
+++ b/src/plugins/select/bluegene/bg_read_config.c
@@ -47,7 +47,9 @@
#include <stdlib.h>
static s_p_options_t bg_conf_file_options[] = {
-#ifdef HAVE_BGL
+#ifndef HAVE_BG_L_P
+ {"AllowSubBlockAllocations", S_P_BOOLEAN},
+#elif defined HAVE_BGL
{"BlrtsImage", S_P_STRING},
{"LinuxImage", S_P_STRING},
{"RamDiskImage", S_P_STRING},
@@ -60,20 +62,25 @@
{"AltCnloadImage", S_P_ARRAY, parse_image, NULL},
{"AltIoloadImage", S_P_ARRAY, parse_image, NULL},
#endif
+ {"DefaultConnType", S_P_STRING},
{"DenyPassthrough", S_P_STRING},
{"LayoutMode", S_P_STRING},
{"MloaderImage", S_P_STRING},
{"BridgeAPILogFile", S_P_STRING},
{"BridgeAPIVerbose", S_P_UINT16},
{"BasePartitionNodeCnt", S_P_UINT16},
+ {"MidplaneNodeCnt", S_P_UINT16},
{"NodeCardNodeCnt", S_P_UINT16},
+ {"NodeBoardNodeCnt", S_P_UINT16},
{"Numpsets", S_P_UINT16},
{"IONodesPerMP", S_P_UINT16},
+ {"MaxBlockInError", S_P_UINT16},
{"BPs", S_P_ARRAY, parse_blockreq, destroy_select_ba_request},
{"MPs", S_P_ARRAY, parse_blockreq, destroy_select_ba_request},
/* these are just going to be put into a list that will be
freed later don't free them after reading them */
{"AltMloaderImage", S_P_ARRAY, parse_image, NULL},
+ {"SubMidplaneSystem", S_P_BOOLEAN},
{NULL}
};
@@ -162,12 +169,14 @@
{"LinuxImage", S_P_STRING},
{"RamDiskImage", S_P_STRING},
#else
+#ifdef HAVE_BGP
{"16CNBlocks", S_P_UINT16},
- {"64CNBlocks", S_P_UINT16},
- {"256CNBlocks", S_P_UINT16},
{"CnloadImage", S_P_STRING},
{"IoloadImage", S_P_STRING},
#endif
+ {"64CNBlocks", S_P_UINT16},
+ {"256CNBlocks", S_P_UINT16},
+#endif
{"MloaderImage", S_P_STRING},
{NULL}
};
@@ -189,7 +198,7 @@
s_p_get_string(&n->blrtsimage, "BlrtsImage", tbl);
s_p_get_string(&n->linuximage, "LinuxImage", tbl);
s_p_get_string(&n->ramdiskimage, "RamDiskImage", tbl);
-#else
+#elif defined HAVE_BGP
s_p_get_string(&n->linuximage, "CnloadImage", tbl);
s_p_get_string(&n->ramdiskimage, "IoloadImage", tbl);
#endif
@@ -217,7 +226,9 @@
}
#ifndef HAVE_BGL
+#ifdef HAVE_BGP
s_p_get_uint16(&n->small16, "16CNBlocks", tbl);
+#endif
s_p_get_uint16(&n->small64, "64CNBlocks", tbl);
s_p_get_uint16(&n->small256, "256CNBlocks", tbl);
#endif
@@ -232,29 +243,31 @@
}
} else {
if (n->conn_type[0] == (uint16_t)NO_VAL) {
- n->conn_type[0] = SELECT_TORUS;
+ n->conn_type[0] = bg_conf->default_conn_type[0];
} else if (n->conn_type[0] >= SELECT_SMALL) {
error("Block def on midplane(s) %s is given "
"TYPE=%s but isn't asking for any small "
- "blocks. Giving it Torus.",
- n->save_name, conn_type_string(n->conn_type[0]));
- n->conn_type[0] = SELECT_TORUS;
+ "blocks. Giving it %s.",
+ n->save_name, conn_type_string(n->conn_type[0]),
+ conn_type_string(
+ bg_conf->default_conn_type[0]));
+ n->conn_type[0] = bg_conf->default_conn_type[0];
}
#ifndef HAVE_BG_L_P
int i;
- int first_conn_type = n->conn_type[0];
for (i=1; i<SYSTEM_DIMENSIONS; i++) {
if (n->conn_type[i] == (uint16_t)NO_VAL)
- n->conn_type[i] = first_conn_type;
+ n->conn_type[i] = bg_conf->default_conn_type[i];
else if (n->conn_type[i] >= SELECT_SMALL) {
error("Block def on midplane(s) %s dim %d "
"is given TYPE=%s but isn't asking "
"for any small blocks. Giving it %s.",
n->save_name, i,
conn_type_string(n->conn_type[i]),
- conn_type_string(first_conn_type));
- n->conn_type[1] = first_conn_type;
+ conn_type_string(
+ bg_conf->default_conn_type[i]));
+ n->conn_type[i] = bg_conf->default_conn_type[i];
}
}
#endif
@@ -329,9 +342,10 @@
extern int read_bg_conf(void)
{
int i;
+ bool tmp_bool = 0;
int count = 0;
s_p_hashtbl_t *tbl = NULL;
- char *layout = NULL;
+ char *tmp_char = NULL;
select_ba_request_t **blockreq_array = NULL;
image_t **image_array = NULL;
image_t *image = NULL;
@@ -567,26 +581,28 @@
list_push(bg_conf->mloader_list, image);
}
- if (!s_p_get_uint16(
- &bg_conf->mp_cnode_cnt, "BasePartitionNodeCnt", tbl)) {
- error("BasePartitionNodeCnt not configured in bluegene.conf "
- "defaulting to 512 as BasePartitionNodeCnt");
- bg_conf->mp_cnode_cnt = 512;
- bg_conf->quarter_cnode_cnt = 128;
- } else {
- if (bg_conf->mp_cnode_cnt <= 0)
- fatal("You should have more than 0 nodes "
- "per base partition");
-
- bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;
+ if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt, "MidplaneNodeCnt", tbl)) {
+ if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt,
+ "BasePartitionNodeCnt", tbl)) {
+ error("MidplaneNodeCnt not configured in bluegene.conf "
+ "defaulting to 512 as MidplaneNodeCnt");
+ bg_conf->mp_cnode_cnt = 512;
+ }
}
+
+ if (bg_conf->mp_cnode_cnt <= 0)
+ fatal("You should have more than 0 nodes "
+ "per base partition");
+ bg_conf->actual_cnodes_per_mp = bg_conf->mp_cnode_cnt;
+ bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;
+
/* bg_conf->cpus_per_mp should had already been set from the
* node_init */
if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
fatal("For some reason we have only %u cpus per mp, but "
"have %u cnodes per mp. You need at least the same "
"number of cpus as you have cnodes per mp. "
- "Check the NodeName Procs= "
+ "Check the NodeName CPUs= "
"definition in the slurm.conf.",
bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
}
@@ -603,41 +619,83 @@
for (i = 0; i<SYSTEM_DIMENSIONS; i++)
num_unused_cpus *= dims[i];
num_unused_cpus *= bg_conf->cpus_per_mp;
+ num_possible_unused_cpus = num_unused_cpus;
- if (!s_p_get_uint16(
- &bg_conf->nodecard_cnode_cnt, "NodeCardNodeCnt", tbl)) {
- error("NodeCardNodeCnt not configured in bluegene.conf "
- "defaulting to 32 as NodeCardNodeCnt");
- bg_conf->nodecard_cnode_cnt = 32;
+ if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
+ "NodeBoardNodeCnt", tbl)) {
+ if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
+ "NodeCardNodeCnt", tbl)) {
+ error("NodeCardNodeCnt not configured in bluegene.conf "
+ "defaulting to 32 as NodeCardNodeCnt");
+ bg_conf->nodecard_cnode_cnt = 32;
+ }
}
- if (bg_conf->nodecard_cnode_cnt<=0)
+ if (bg_conf->nodecard_cnode_cnt <= 0)
fatal("You should have more than 0 nodes per nodecard");
bg_conf->mp_nodecard_cnt =
bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;
- if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
- fatal("Warning: Numpsets not configured in bluegene.conf");
- if (!bg_conf->ionodes_per_mp) {
- if (!s_p_get_uint16(&bg_conf->ionodes_per_mp,
- "IONodesPerMP", tbl))
+ if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "IONodesPerMP", tbl))
+ if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
fatal("Warning: IONodesPerMP not configured "
"in bluegene.conf");
- }
+
+ s_p_get_uint16(&bg_conf->max_block_err, "MaxBlockInError", tbl);
+
+ tmp_bool = 0;
+ s_p_get_boolean(&tmp_bool, "SubMidplaneSystem", tbl);
+ bg_conf->sub_mp_sys = tmp_bool;
#ifdef HAVE_BGQ
+ tmp_bool = 0;
+ s_p_get_boolean(&tmp_bool, "AllowSubBlockAllocations", tbl);
+ bg_conf->sub_blocks = tmp_bool;
+
/* You can only have 16 ionodes per midplane */
if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif
+ for (i=0; i<SYSTEM_DIMENSIONS; i++)
+ bg_conf->default_conn_type[i] = (uint16_t)NO_VAL;
+ s_p_get_string(&tmp_char, "DefaultConnType", tbl);
+ if (tmp_char) {
+ verify_conn_type(tmp_char, bg_conf->default_conn_type);
+ if ((bg_conf->default_conn_type[0] != SELECT_MESH)
+ && (bg_conf->default_conn_type[0] != SELECT_TORUS))
+ fatal("Can't have a DefaultConnType of %s "
+ "(only Mesh or Torus values are valid).",
+ tmp_char);
+ xfree(tmp_char);
+ } else
+ bg_conf->default_conn_type[0] = SELECT_TORUS;
+
+#ifndef HAVE_BG_L_P
+ int first_conn_type = bg_conf->default_conn_type[0];
+ for (i=1; i<SYSTEM_DIMENSIONS; i++) {
+ if (bg_conf->default_conn_type[i] == (uint16_t)NO_VAL)
+ bg_conf->default_conn_type[i] = first_conn_type;
+ else if (bg_conf->default_conn_type[i] >= SELECT_SMALL)
+ fatal("Can't have a DefaultConnType of %s "
+ "(only Mesh or Torus values are valid).",
+ tmp_char);
+ }
+#endif
+
if (bg_conf->ionodes_per_mp) {
bitstr_t *tmp_bitmap = NULL;
int small_size = 1;
- /* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK */
- if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
+ /* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK,
+ * Sometime on a Q system the nodecard isn't in the 0
+ * spot so only do this if you know it is in that
+ * spot. Otherwise say the whole midplane is there
+ * and just make blocks over the whole thing. They
+ * you can error out the blocks that aren't usable. */
+ if (bg_conf->sub_mp_sys
+ && bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
#ifdef HAVE_BGQ
bg_conf->quarter_ionode_cnt = 1;
bg_conf->nodecard_ionode_cnt = 1;
@@ -773,58 +831,64 @@
else
_reopen_bridge_log();
- if (s_p_get_string(&layout, "DenyPassthrough", tbl)) {
- if (strstr(layout, "A"))
+ if (s_p_get_string(&tmp_char, "DenyPassthrough", tbl)) {
+ if (strstr(tmp_char, "A"))
ba_deny_pass |= PASS_DENY_A;
- if (strstr(layout, "X"))
+ if (strstr(tmp_char, "X"))
ba_deny_pass |= PASS_DENY_X;
- if (strstr(layout, "Y"))
+ if (strstr(tmp_char, "Y"))
ba_deny_pass |= PASS_DENY_Y;
- if (strstr(layout, "Z"))
+ if (strstr(tmp_char, "Z"))
ba_deny_pass |= PASS_DENY_Z;
- if (!strcasecmp(layout, "ALL"))
+ if (!strcasecmp(tmp_char, "ALL"))
ba_deny_pass |= PASS_DENY_ALL;
bg_conf->deny_pass = ba_deny_pass;
- xfree(layout);
+ xfree(tmp_char);
}
- if (!s_p_get_string(&layout, "LayoutMode", tbl)) {
+ if (!s_p_get_string(&tmp_char, "LayoutMode", tbl)) {
info("Warning: LayoutMode was not specified in bluegene.conf "
"defaulting to STATIC partitioning");
bg_conf->layout_mode = LAYOUT_STATIC;
} else {
- if (!strcasecmp(layout,"STATIC"))
+ if (!strcasecmp(tmp_char,"STATIC"))
bg_conf->layout_mode = LAYOUT_STATIC;
- else if (!strcasecmp(layout,"OVERLAP"))
+ else if (!strcasecmp(tmp_char,"OVERLAP"))
bg_conf->layout_mode = LAYOUT_OVERLAP;
- else if (!strcasecmp(layout,"DYNAMIC"))
+ else if (!strcasecmp(tmp_char,"DYNAMIC"))
bg_conf->layout_mode = LAYOUT_DYNAMIC;
else {
fatal("I don't understand this LayoutMode = %s",
- layout);
+ tmp_char);
}
- xfree(layout);
+ xfree(tmp_char);
}
/* add blocks defined in file */
if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
if (!s_p_get_array((void ***)&blockreq_array,
- &count, "BPs", tbl)) {
- info("WARNING: no blocks defined in bluegene.conf, "
- "only making full system block");
- if (bg_conf->mp_cnode_cnt
- == bg_conf->nodecard_cnode_cnt)
- fatal("On a sub-midplane system you need to "
- "define the blocks you want on your "
- "system.");
- /* create_full_system_block(NULL); */
+ &count, "MPs", tbl)) {
+ if (!s_p_get_array((void ***)&blockreq_array,
+ &count, "BPs", tbl)) {
+ info("WARNING: no blocks defined in "
+ "bluegene.conf, "
+ "only making full system block");
+ /* create_full_system_block(NULL); */
+ if (bg_conf->sub_mp_sys ||
+ (bg_conf->mp_cnode_cnt ==
+ bg_conf->nodecard_cnode_cnt))
+ fatal("On a sub-midplane system you "
+ "need to define the blocks you "
+ "want on your system.");
+ }
}
for (i = 0; i < count; i++) {
add_bg_record(bg_lists->main, NULL,
blockreq_array[i], 0, 0);
}
- } else if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
+ } else if (bg_conf->sub_mp_sys ||
+ (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt))
/* we can't do dynamic here on a sub-midplane system */
fatal("On a sub-midplane system we can only do OVERLAP or "
"STATIC LayoutMode. Please update your bluegene.conf.");
diff --git a/src/plugins/select/bluegene/bg_record_functions.c b/src/plugins/select/bluegene/bg_record_functions.c
index 64b1045..34738a7 100644
--- a/src/plugins/select/bluegene/bg_record_functions.c
+++ b/src/plugins/select/bluegene/bg_record_functions.c
@@ -50,13 +50,16 @@
static void _addto_mp_list(bg_record_t *bg_record,
uint16_t *start, uint16_t *end);
static int _ba_mp_cmpf_inc(ba_mp_t *node_a, ba_mp_t *node_b);
+static void _set_block_avail(bg_record_t *bg_record);
extern void print_bg_record(bg_record_t* bg_record)
{
+ char *conn_type;
if (!bg_record) {
error("print_bg_record, record given is null");
return;
}
+ conn_type = conn_type_string_full(bg_record->conn_type);
#if _DEBUG
info(" bg_record: ");
if (bg_record->bg_block_id)
@@ -68,7 +71,7 @@
bg_record->cpu_cnt);
info("\tgeo: %ux%ux%u", bg_record->geo[X], bg_record->geo[Y],
bg_record->geo[Z]);
- info("\tconn_type: %s", conn_type_string(bg_record->conn_type[0]));
+ info("\tconn_type: %s", conn_type);
#ifdef HAVE_BGL
info("\tnode_use: %s", node_use_string(bg_record->node_use));
#endif
@@ -83,9 +86,10 @@
format_node_name(bg_record, tmp_char, sizeof(tmp_char));
info("Record: BlockID:%s Nodes:%s Conn:%s",
bg_record->bg_block_id, tmp_char,
- conn_type_string(bg_record->conn_type[0]));
+ conn_type);
}
#endif
+ xfree(conn_type);
}
extern void destroy_bg_record(void *object)
@@ -110,13 +114,10 @@
xfree(bg_record->linuximage);
xfree(bg_record->mloaderimage);
- FREE_NULL_BITMAP(bg_record->mp_bitmap);
xfree(bg_record->mp_str);
- FREE_NULL_BITMAP(bg_record->mp_used_bitmap);
+ FREE_NULL_BITMAP(bg_record->mp_bitmap);
xfree(bg_record->ramdiskimage);
xfree(bg_record->reason);
- xfree(bg_record->target_name);
- xfree(bg_record->user_name);
xfree(bg_record);
}
@@ -232,6 +233,8 @@
list_sort(bg_record->ba_mp_list, (ListCmpF) _ba_mp_cmpf_inc);
+ FREE_NULL_BITMAP(bg_record->mp_bitmap);
+ bg_record->mp_bitmap = bit_alloc(node_record_count);
bg_record->mp_count = 0;
itr = list_iterator_create(bg_record->ba_mp_list);
while ((ba_mp = list_next(itr))) {
@@ -250,6 +253,7 @@
(int16_t)bg_record->start[dim]))
bg_record->start[dim] = ba_mp->coord[dim];
}
+ bit_set(bg_record->mp_bitmap, ba_mp->index);
}
list_iterator_destroy(itr);
if (bg_conf->slurm_debug_level >= LOG_LEVEL_DEBUG3) {
@@ -278,13 +282,6 @@
} else if (bg_record->cnode_cnt == bg_conf->mp_cnode_cnt)
bg_record->full_block = 1;
- FREE_NULL_BITMAP(bg_record->mp_bitmap);
- if (node_name2bitmap(bg_record->mp_str,
- false,
- &bg_record->mp_bitmap)) {
- fatal("process_nodes: Unable to convert nodes %s to bitmap",
- bg_record->mp_str);
- }
return;
}
@@ -304,8 +301,12 @@
error("trying to copy a bad record");
continue;
}
- /* we don't care about blocks being freed */
- if (bg_record->free_cnt)
+ /* we don't care about blocks being destroyed and the
+ * job is gone */
+ if (bg_record->destroy
+ && (!bg_record->job_ptr
+ && (!bg_record->job_list
+ || !list_count(bg_record->job_list))))
continue;
new_record = xmalloc(sizeof(bg_record_t));
@@ -332,6 +333,7 @@
}
xfree(sec_record->bg_block_id);
+ sec_record->action = fir_record->action;
sec_record->bg_block_id = xstrdup(fir_record->bg_block_id);
if (sec_record->ba_mp_list)
@@ -341,6 +343,17 @@
itr = list_iterator_create(fir_record->ba_mp_list);
while ((ba_mp = list_next(itr))) {
new_ba_mp = ba_copy_mp(ba_mp);
+
+ if (ba_mp->cnode_bitmap)
+ new_ba_mp->cnode_bitmap =
+ bit_copy(ba_mp->cnode_bitmap);
+ if (ba_mp->cnode_err_bitmap)
+ new_ba_mp->cnode_err_bitmap =
+ bit_copy(ba_mp->cnode_err_bitmap);
+ if (ba_mp->cnode_usable_bitmap)
+ new_ba_mp->cnode_usable_bitmap =
+ bit_copy(ba_mp->cnode_usable_bitmap);
+
list_append(sec_record->ba_mp_list, new_ba_mp);
}
list_iterator_destroy(itr);
@@ -348,7 +361,7 @@
FREE_NULL_BITMAP(sec_record->mp_bitmap);
if (fir_record->mp_bitmap
- && (sec_record->mp_bitmap = bit_copy(fir_record->mp_bitmap)) == NULL) {
+ && !(sec_record->mp_bitmap = bit_copy(fir_record->mp_bitmap))) {
error("Unable to copy bitmap for %s", fir_record->mp_str);
sec_record->mp_bitmap = NULL;
}
@@ -356,27 +369,25 @@
sec_record->boot_state = fir_record->boot_state;
sec_record->boot_count = fir_record->boot_count;
- FREE_NULL_BITMAP(sec_record->mp_used_bitmap);
- if (fir_record->mp_used_bitmap
- && (sec_record->mp_used_bitmap
- = bit_copy(fir_record->mp_used_bitmap)) == NULL) {
- error("Unable to copy mp_used_bitmap for %s",
- fir_record->mp_str);
- sec_record->mp_used_bitmap = NULL;
- }
sec_record->cnode_cnt = fir_record->cnode_cnt;
+ sec_record->cnode_err_cnt = fir_record->cnode_err_cnt;
memcpy(sec_record->conn_type, fir_record->conn_type,
sizeof(sec_record->conn_type));
sec_record->cpu_cnt = fir_record->cpu_cnt;
+ sec_record->destroy = fir_record->destroy;
+ sec_record->err_ratio = fir_record->err_ratio;
sec_record->free_cnt = fir_record->free_cnt;
sec_record->full_block = fir_record->full_block;
- for(i=0;i<SYSTEM_DIMENSIONS;i++) {
+ for (i=0;i<SYSTEM_DIMENSIONS;i++) {
sec_record->geo[i] = fir_record->geo[i];
sec_record->start[i] = fir_record->start[i];
}
+ for (i=0;i<HIGHEST_DIMENSIONS;i++)
+ sec_record->start_small[i] = fir_record->start_small[i];
+
xfree(sec_record->ionode_str);
sec_record->ionode_str = xstrdup(fir_record->ionode_str);
@@ -389,6 +400,19 @@
sec_record->ionode_bitmap = NULL;
}
+ if (sec_record->job_list) {
+ list_destroy(sec_record->job_list);
+ sec_record->job_list = NULL;
+ }
+
+ if (fir_record->job_list) {
+ struct job_record *job_ptr;
+ sec_record->job_list = list_create(NULL);
+ itr = list_iterator_create(fir_record->job_list);
+ while ((job_ptr = list_next(itr)))
+ list_append(sec_record->job_list, job_ptr);
+ list_iterator_destroy(itr);
+ }
sec_record->job_ptr = fir_record->job_ptr;
sec_record->job_running = fir_record->job_running;
@@ -424,13 +448,6 @@
sec_record->reason = xstrdup(fir_record->reason);
sec_record->state = fir_record->state;
-
- xfree(sec_record->target_name);
- sec_record->target_name = xstrdup(fir_record->target_name);
- xfree(sec_record->user_name);
- sec_record->user_name = xstrdup(fir_record->user_name);
-
- sec_record->user_uid = fir_record->user_uid;
}
/*
@@ -487,109 +504,51 @@
else if ((rec_a->job_running != BLOCK_ERROR_STATE)
&& (rec_b->job_running == BLOCK_ERROR_STATE))
return -1;
- else if (!rec_a->job_ptr && rec_b->job_ptr)
- return -1;
- else if (rec_a->job_ptr && !rec_b->job_ptr)
- return 1;
- else if (rec_a->job_ptr && rec_b->job_ptr) {
- if (rec_a->job_ptr->end_time > rec_b->job_ptr->end_time)
+
+ if (!rec_a->avail_set)
+ _set_block_avail(rec_a);
+
+ if (!rec_b->avail_set)
+ _set_block_avail(rec_b);
+
+ /* Don't use this check below. It will mess up preemption by
+ sending this smaller block to the back of the list just
+ because it is fully used.
+ */
+ /* if (!rec_a->avail_cnode_cnt && rec_b->avail_cnode_cnt) */
+ /* return 1; */
+ /* else if (rec_a->avail_cnode_cnt && !rec_b->avail_cnode_cnt) */
+ /* return -1; */
+
+ if (rec_a->job_list && rec_b->job_list) {
+ /* we only want to use this sort on 1 midplane blocks
+ that are used for sharing
+ */
+ if (rec_a->avail_cnode_cnt > rec_b->avail_cnode_cnt)
return 1;
- else if (rec_a->job_ptr->end_time < rec_b->job_ptr->end_time)
+ else if (rec_a->avail_cnode_cnt < rec_b->avail_cnode_cnt)
return -1;
}
+ if (rec_a->avail_job_end > rec_b->avail_job_end)
+ return 1;
+ else if (rec_a->avail_job_end < rec_b->avail_job_end)
+ return -1;
+
+ /* if (!job_ptr_a && job_ptr_b) */
+ /* return -1; */
+ /* else if (job_ptr_a && !job_ptr_b) */
+ /* return 1; */
+ /* else if (job_ptr_a && job_ptr_b) { */
+ /* if (job_ptr_a->end_time > job_ptr_b->end_time) */
+ /* return 1; */
+ /* else if (job_ptr_a->end_time < job_ptr_b->end_time) */
+ /* return -1; */
+ /* } */
+
return bg_record_cmpf_inc(rec_a, rec_b);
}
-/* All changes to the bg_list target_name must
- be done before this function is called.
- also slurm_conf_lock() must be called before calling this
- function along with slurm_conf_unlock() afterwards.
-*/
-extern int update_block_user(bg_record_t *bg_record, int set)
-{
- int rc=0;
-
- if (!bg_record->target_name) {
- error("Must set target_name to run update_block_user.");
- return -1;
- }
- if (!bg_record->user_name) {
- error("No user_name");
- bg_record->user_name = xstrdup(bg_conf->slurm_user_name);
- }
-
- if (set) {
- if ((rc = bridge_block_remove_all_users(
- bg_record, bg_record->target_name))
- == REMOVE_USER_ERR) {
- error("1 Something happened removing "
- "users from block %s",
- bg_record->bg_block_id);
- return -1;
- } else if (rc == REMOVE_USER_NONE) {
- if (strcmp(bg_record->target_name,
- bg_conf->slurm_user_name)) {
- info("Adding user %s to Block %s",
- bg_record->target_name,
- bg_record->bg_block_id);
-
- if ((rc = bridge_block_add_user(
- bg_record,
- bg_record->target_name))
- != SLURM_SUCCESS) {
- error("bridge_add_block_user"
- "(%s,%s): %s",
- bg_record->bg_block_id,
- bg_record->target_name,
- bg_err_str(rc));
- return -1;
- }
- }
- }
- }
-
- if (strcmp(bg_record->target_name, bg_record->user_name)) {
- uid_t pw_uid;
- xfree(bg_record->user_name);
- bg_record->user_name = xstrdup(bg_record->target_name);
- if (uid_from_string (bg_record->user_name, &pw_uid) < 0) {
- error("No such user: %s", bg_record->user_name);
- return -1;
- } else {
- bg_record->user_uid = pw_uid;
- }
- return 1;
- }
-
- return 0;
-}
-
-extern int set_block_user(bg_record_t *bg_record)
-{
- int rc = 0;
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
- info("resetting the boot state flag and "
- "counter for block %s.",
- bg_record->bg_block_id);
- bg_record->boot_state = BG_BLOCK_FREE;
- bg_record->boot_count = 0;
-
- if ((rc = update_block_user(bg_record, 1)) == 1) {
- last_bg_update = time(NULL);
- rc = SLURM_SUCCESS;
- } else if (rc == -1) {
- error("Unable to add user name to block %s. "
- "Cancelling job.",
- bg_record->bg_block_id);
- rc = SLURM_ERROR;
- }
- xfree(bg_record->target_name);
- bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
-
- return rc;
-}
-
/* Try to requeue job running on block and put block in an error state.
* block_state_mutex must be unlocked before calling this.
*/
@@ -603,8 +562,14 @@
}
if (bg_record->job_running > NO_JOB_RUNNING)
- bg_requeue_job(bg_record->job_running, 0);
-
+ bg_requeue_job(bg_record->job_running, 0, 0);
+ else if (bg_record->job_list) {
+ ListIterator itr = list_iterator_create(bg_record->job_list);
+ struct job_record *job_ptr;
+ while ((job_ptr = list_next(itr)))
+ bg_requeue_job(job_ptr->job_id, 0, 0);
+ list_iterator_destroy(itr);
+ }
slurm_mutex_lock(&block_state_mutex);
rc = block_ptr_exist_in_list(bg_lists->main, bg_record);
slurm_mutex_unlock(&block_state_mutex);
@@ -625,8 +590,8 @@
bg_record_t *bg_record = NULL;
ba_mp_t *ba_mp = NULL;
ListIterator itr;
- uid_t pw_uid;
int i, len;
+ char *conn_type = NULL;
xassert(bg_conf->slurm_user_name);
@@ -637,14 +602,6 @@
bg_record->magic = BLOCK_MAGIC;
- bg_record->user_name = xstrdup(bg_conf->slurm_user_name);
- bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
-
- if (uid_from_string (bg_record->user_name, &pw_uid) < 0)
- error("add_bg_record: No such user: %s", bg_record->user_name);
- else
- bg_record->user_uid = pw_uid;
-
if (used_nodes && *used_nodes) {
#ifdef HAVE_BGQ
bg_record->ba_mp_list = *used_nodes;
@@ -661,25 +618,29 @@
/* bg_record->boot_state = 0; Implicit */
bg_record->state = BG_BLOCK_FREE;
-
#ifdef HAVE_BGL
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
+ conn_type = conn_type_string_full(blockreq->conn_type);
info("add_bg_record: asking for %s %d %d %s",
blockreq->save_name, blockreq->small32, blockreq->small128,
- conn_type_string(blockreq->conn_type[0]));
+ conn_type);
+ xfree(conn_type);
+ }
#else
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
+ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
+ conn_type = conn_type_string_full(blockreq->conn_type);
info("add_bg_record: asking for %s %d %d %d %d %d %s",
blockreq->save_name, blockreq->small256,
blockreq->small128, blockreq->small64,
blockreq->small32, blockreq->small16,
- conn_type_string(blockreq->conn_type[0]));
+ conn_type);
+ xfree(conn_type);
+ }
#endif
/* Set the bitmap blank here if it is a full node we don't
want anything set we also don't want the bg_record->ionode_str set.
*/
bg_record->ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
- bg_record->mp_used_bitmap = bit_alloc(node_record_count);
len = strlen(blockreq->save_name);
i=0;
@@ -737,6 +698,14 @@
bg_record->mloaderimage =
xstrdup(bg_conf->default_mloaderimage);
+#ifdef HAVE_BGQ
+ /* The start is always right, for blocks larger than 1, from
+ the blockreq so don't take chances. */
+ if (bg_record->mp_count > 1)
+ memcpy(bg_record->start, blockreq->start,
+ sizeof(bg_record->start));
+#endif
+
if (bg_record->conn_type[0] < SELECT_SMALL) {
/* this needs to be an append so we keep things in the
order we got them, they will be sorted later */
@@ -749,6 +718,24 @@
"destroying this mp list");
list_destroy(bg_record->ba_mp_list);
bg_record->ba_mp_list = NULL;
+ } else if (bg_conf->sub_blocks && bg_record->mp_count == 1) {
+ ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
+ /* This will be a list containing jobs running on this
+ block */
+ bg_record->job_list = list_create(NULL);
+
+ /* Create these now so we can deal with error
+ cnodes if/when they happen. Since this is
+ the easiest place to figure it out for
+ blocks that don't use the entire block */
+ if ((ba_mp->cnode_bitmap =
+ ba_create_ba_mp_cnode_bitmap(bg_record))) {
+ ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+ ba_mp->cnode_usable_bitmap =
+ bit_copy(ba_mp->cnode_bitmap);
+ }
}
} else {
List ba_mp_list = NULL;
@@ -970,23 +957,29 @@
* will call the functions without locking the locks again.
*/
extern int down_nodecard(char *mp_name, bitoff_t io_start,
- bool slurmctld_locked)
+ bool slurmctld_locked, char *reason)
{
List requests = NULL;
- List delete_list = NULL;
+ List delete_list = NULL, pass_list = NULL;
ListIterator itr = NULL;
- bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
+ bg_record_t *bg_record = NULL, *found_record = NULL,
+ tmp_record, *error_bg_record = NULL;
bg_record_t *smallest_bg_record = NULL;
struct node_record *node_ptr = NULL;
int mp_bit = 0;
+ bool has_pass = 0;
static int io_cnt = NO_VAL;
static int create_size = NO_VAL;
static select_ba_request_t blockreq;
int rc = SLURM_SUCCESS;
- char *reason = "select_bluegene: nodecard down";
+ slurmctld_lock_t job_write_lock = {
+ NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
xassert(mp_name);
+ if (!reason)
+ reason = "select_bluegene: nodecard down";
+
if (io_cnt == NO_VAL) {
io_cnt = 1;
/* Translate 1 nodecard count to ionode count */
@@ -1037,32 +1030,68 @@
tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);
+ /* To avoid deadlock we always must lock the slurmctld before
+ the block_state_mutex.
+ */
+ if (!slurmctld_locked)
+ lock_slurmctld(job_write_lock);
slurm_mutex_lock(&block_state_mutex);
itr = list_iterator_create(bg_lists->main);
while ((bg_record = list_next(itr))) {
- if (!bit_test(bg_record->mp_bitmap, mp_bit))
+ if (bg_record->destroy)
continue;
- if (!blocks_overlap(bg_record, &tmp_record))
+ if (!bit_test(bg_record->mp_bitmap, mp_bit)
+#ifndef HAVE_BG_L_P
+ /* In BGQ if a nodeboard goes down you can no
+ longer use any block using that nodeboard in a
+ passthrough, so we need to remove it.
+ */
+ && !(has_pass = block_mp_passthrough(bg_record, mp_bit))
+#endif
+ )
+ continue;
+
+ if (!has_pass && !blocks_overlap(bg_record, &tmp_record))
continue;
if (bg_record->job_running > NO_JOB_RUNNING) {
- if (slurmctld_locked)
- job_fail(bg_record->job_running);
- else
- slurm_fail_job(bg_record->job_running);
-
+ job_fail(bg_record->job_running);
+ } else if (bg_record->job_list) {
+ ListIterator job_itr = list_iterator_create(
+ bg_record->job_list);
+ struct job_record *job_ptr;
+ while ((job_ptr = list_next(job_itr))) {
+ job_fail(job_ptr->job_id);
+ }
+ list_iterator_destroy(job_itr);
}
/* If Running Dynamic mode and the block is
smaller than the create size just continue on.
*/
- if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
- && (bg_record->cnode_cnt < create_size)) {
- if (!delete_list)
- delete_list = list_create(NULL);
- list_append(delete_list, bg_record);
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
+ if (bg_record->cnode_cnt < create_size) {
+ if (!delete_list)
+ delete_list = list_create(NULL);
+ list_append(delete_list, bg_record);
+ continue;
+ } else if (has_pass) {
+ /* Set it up so the passthrough blocks
+ get removed since they are no
+ longer valid.
+ */
+ if (!pass_list)
+ pass_list = list_create(NULL);
+ list_append(pass_list, bg_record);
+ continue;
+ }
+ } else if (has_pass) /* on non-dynamic systems this
+ block doesn't really mean
+ anything we just needed to
+ fail the job (which was
+ probably already failed).
+ */
continue;
- }
/* keep track of the smallest size that is at least
the size of create_size. */
@@ -1071,14 +1100,18 @@
smallest_bg_record = bg_record;
}
list_iterator_destroy(itr);
- slurm_mutex_unlock(&block_state_mutex);
+
+ /* We cannot unlock block_state_mutex here until we are done
+ * with smallest_bg_record.
+ */
if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
debug3("running non-dynamic mode");
/* This should never happen, but just in case... */
- if (delete_list)
+ if (delete_list) {
list_destroy(delete_list);
-
+ delete_list = NULL;
+ }
/* If we found a block that is smaller or equal to a
midplane we will just mark it in an error state as
opposed to draining the node.
@@ -1087,24 +1120,23 @@
&& (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
rc = SLURM_NO_CHANGE_IN_DATA;
+ slurm_mutex_unlock(&block_state_mutex);
goto cleanup;
}
- rc = put_block_in_error_state(
- smallest_bg_record, reason);
+ slurm_mutex_unlock(&block_state_mutex);
+ error_bg_record = smallest_bg_record;
goto cleanup;
}
+ slurm_mutex_unlock(&block_state_mutex);
debug("No block under 1 midplane available for this nodecard. "
"Draining the whole node.");
- if (!node_already_down(mp_name)) {
- if (slurmctld_locked)
- drain_nodes(mp_name, reason,
- slurm_get_slurm_user_id());
- else
- slurm_drain_nodes(mp_name, reason,
- slurm_get_slurm_user_id());
- }
+
+ /* the slurmctld is always locked here */
+ if (!node_already_down(mp_name))
+ drain_nodes(mp_name, reason,
+ slurm_get_slurm_user_id());
rc = SLURM_SUCCESS;
goto cleanup;
}
@@ -1114,25 +1146,22 @@
if (delete_list) {
int cnt_set = 0;
bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
- /* don't lock here since it is handled inside
- the put_block_in_error_state
- */
itr = list_iterator_create(delete_list);
while ((bg_record = list_next(itr))) {
debug2("combining smaller than nodecard "
"dynamic block %s",
bg_record->bg_block_id);
- while (bg_record->job_running > NO_JOB_RUNNING)
- sleep(1);
-
bit_or(iobitmap, bg_record->ionode_bitmap);
cnt_set++;
}
list_iterator_destroy(itr);
list_destroy(delete_list);
+ delete_list = NULL;
+
if (!cnt_set) {
FREE_NULL_BITMAP(iobitmap);
rc = SLURM_ERROR;
+ slurm_mutex_unlock(&block_state_mutex);
goto cleanup;
}
/* set the start to be the same as the start of the
@@ -1154,27 +1183,26 @@
} else if (smallest_bg_record) {
debug2("smallest dynamic block is %s",
smallest_bg_record->bg_block_id);
- if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
- rc = SLURM_NO_CHANGE_IN_DATA;
- goto cleanup;
- }
-
- while (smallest_bg_record->job_running > NO_JOB_RUNNING)
- sleep(1);
if (smallest_bg_record->cnode_cnt == create_size) {
- rc = put_block_in_error_state(
- smallest_bg_record, reason);
+ slurm_mutex_unlock(&block_state_mutex);
+ error_bg_record = smallest_bg_record;
goto cleanup;
}
+ /* If the block is bigger than the asked for error we
+ need to resume it to keep accounting correct.
+ */
+ if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG)
+ resume_block(smallest_bg_record);
+
if (create_size > smallest_bg_record->cnode_cnt) {
/* we should never get here. This means we
* have a create_size that is bigger than a
* block that is already made.
*/
- rc = put_block_in_error_state(
- smallest_bg_record, reason);
+ slurm_mutex_unlock(&block_state_mutex);
+ error_bg_record = smallest_bg_record;
goto cleanup;
}
debug3("node count is %d", smallest_bg_record->cnode_cnt);
@@ -1199,9 +1227,10 @@
if (create_size != bg_conf->nodecard_cnode_cnt) {
blockreq.small128 = blockreq.small32 / 4;
blockreq.small32 = 0;
- io_start = 0;
- } else if ((io_start =
- bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
+ }
+
+ if ((io_start =
+ bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
/* set the start to be the same as the start of the
ionode_bitmap. If no ionodes set (not a small
block) set io_start = 0. */
@@ -1222,16 +1251,11 @@
blockreq.small128 = 4;
break;
case 512:
- if (!node_already_down(mp_name)) {
- char *reason = "select_bluegene: nodecard down";
- if (slurmctld_locked)
- drain_nodes(mp_name, reason,
- slurm_get_slurm_user_id());
- else
- slurm_drain_nodes(
- mp_name, reason,
- slurm_get_slurm_user_id());
- }
+ slurm_mutex_unlock(&block_state_mutex);
+ /* the slurmctld is always locked here */
+ if (!node_already_down(mp_name))
+ drain_nodes(mp_name, reason,
+ slurm_get_slurm_user_id());
rc = SLURM_SUCCESS;
goto cleanup;
break;
@@ -1246,7 +1270,6 @@
smallest block that takes up the entire midplane. */
}
-
/* Here we need to add blocks that take up nodecards on this
midplane. Since Slurm only keeps track of midplanes
natively this is the only want to handle this case.
@@ -1254,15 +1277,61 @@
requests = list_create(destroy_bg_record);
add_bg_record(requests, NULL, &blockreq, 1, io_start);
- slurm_mutex_lock(&block_state_mutex);
- delete_list = list_create(NULL);
+ if (bg_conf->sub_blocks
+ && (!smallest_bg_record
+ || smallest_bg_record->cnode_cnt == bg_conf->mp_cnode_cnt)) {
+ bg_record_t *rem_record = NULL;
+ memset(&blockreq, 0, sizeof(select_ba_request_t));
+ blockreq.conn_type[0] = SELECT_SMALL;
+ blockreq.save_name = mp_name;
+ blockreq.small256 = 2;
+ add_bg_record(requests, NULL, &blockreq, 1, io_start);
+
+ itr = list_iterator_create(requests);
+ while ((bg_record = list_next(itr))) {
+ if (bit_overlap(bg_record->ionode_bitmap,
+ tmp_record.ionode_bitmap)) {
+ if (bg_record->cnode_cnt == 256) {
+ print_bg_record(bg_record);
+ rem_record = bg_record;
+ list_remove(itr);
+ break;
+ }
+ }
+ }
+ if (!rem_record) {
+ /* this should never happen */
+ error("down_nodecard: something bad happened "
+ "with creation of 256 block");
+ } else {
+ list_iterator_reset(itr);
+ while ((bg_record = list_next(itr))) {
+ if (bg_record->cnode_cnt == 256)
+ continue;
+ if (!bit_overlap(bg_record->ionode_bitmap,
+ rem_record->ionode_bitmap)) {
+ print_bg_record(bg_record);
+ list_delete_item(itr);
+ }
+ }
+ destroy_bg_record(rem_record);
+ }
+ list_iterator_destroy(itr);
+ }
+
+ if (pass_list) {
+ delete_list = pass_list;
+ pass_list = NULL;
+ } else
+ delete_list = list_create(NULL);
while ((bg_record = list_pop(requests))) {
itr = list_iterator_create(bg_lists->main);
while ((found_record = list_next(itr))) {
+ if (found_record->destroy)
+ continue;
if (!blocks_overlap(bg_record, found_record))
continue;
list_push(delete_list, found_record);
- list_remove(itr);
}
list_iterator_destroy(itr);
@@ -1283,26 +1352,44 @@
tmp_record.ionode_bitmap)) {
/* here we know the error block doesn't exist
so just set the state here */
- slurm_mutex_unlock(&block_state_mutex);
- rc = put_block_in_error_state(bg_record, reason);
- slurm_mutex_lock(&block_state_mutex);
+ error_bg_record = bg_record;
}
}
list_destroy(requests);
- if (delete_list) {
- slurm_mutex_unlock(&block_state_mutex);
- free_block_list(NO_VAL, delete_list, 0, 0);
- list_destroy(delete_list);
- }
- slurm_mutex_lock(&block_state_mutex);
sort_bg_record_inc_size(bg_lists->main);
- slurm_mutex_unlock(&block_state_mutex);
last_bg_update = time(NULL);
+ slurm_mutex_unlock(&block_state_mutex);
cleanup:
+ if (!slurmctld_locked)
+ unlock_slurmctld(job_write_lock);
FREE_NULL_BITMAP(tmp_record.mp_bitmap);
FREE_NULL_BITMAP(tmp_record.ionode_bitmap);
+ if (error_bg_record) {
+ /* all locks must be released before going into
+ * put_block_in_error_state.
+ */
+ if (slurmctld_locked)
+ unlock_slurmctld(job_write_lock);
+ rc = put_block_in_error_state(error_bg_record, reason);
+ if (slurmctld_locked)
+ lock_slurmctld(job_write_lock);
+ }
+
+ if (pass_list) {
+ delete_list = pass_list;
+ pass_list = NULL;
+ }
+
+ if (delete_list) {
+ bool delete_it = 0;
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
+ delete_it = 1;
+ free_block_list(NO_VAL, delete_list, delete_it, 0);
+ list_destroy(delete_list);
+ delete_list = NULL;
+ }
return rc;
@@ -1361,8 +1448,6 @@
/* block_state_mutex must be unlocked before calling this. */
extern int put_block_in_error_state(bg_record_t *bg_record, char *reason)
{
- uid_t pw_uid;
-
xassert(bg_record);
/* Only check this if the blocks are created, meaning this
@@ -1373,15 +1458,18 @@
to wait for the job to be removed. We don't really
need to free the block though since we may just
want it to be in an error state for some reason. */
- while (bg_record->job_running > NO_JOB_RUNNING) {
- if (bg_record->magic != BLOCK_MAGIC) {
- error("While putting block %s in a error "
- "state it was destroyed",
- bg_record->bg_block_id);
- return SLURM_ERROR;
- }
- debug2("block %s is still running job %d",
- bg_record->bg_block_id, bg_record->job_running);
+ while ((bg_record->magic == BLOCK_MAGIC)
+ && ((bg_record->job_running > NO_JOB_RUNNING)
+ || (bg_record->job_list
+ && list_count(bg_record->job_list)))) {
+ if (bg_record->job_running > NO_JOB_RUNNING)
+ debug2("block %s is still running job %d",
+ bg_record->bg_block_id,
+ bg_record->job_running);
+ else
+ debug2("block %s is still running jobs",
+ bg_record->bg_block_id);
+
sleep(1);
}
}
@@ -1399,23 +1487,19 @@
if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) {
list_push(bg_lists->job_running, bg_record);
num_unused_cpus -= bg_record->cpu_cnt;
+ } else if (!(bg_record->state & BG_BLOCK_ERROR_FLAG)) {
+ info("hey I was in the job_running table %d %d %s?",
+ list_count(bg_record->job_list), num_unused_cpus,
+ bg_block_state_string(bg_record->state));
+ xassert(0);
}
+
if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
list_push(bg_lists->booted, bg_record);
bg_record->job_running = BLOCK_ERROR_STATE;
bg_record->state |= BG_BLOCK_ERROR_FLAG;
- xfree(bg_record->user_name);
- xfree(bg_record->target_name);
- bg_record->user_name = xstrdup(bg_conf->slurm_user_name);
- bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
-
- if (uid_from_string (bg_record->user_name, &pw_uid) < 0)
- error("No such user: %s", bg_record->user_name);
- else
- bg_record->user_uid = pw_uid;
-
/* Only send if reason is set. If it isn't set then
accounting should already know about this error state */
if (reason) {
@@ -1438,14 +1522,29 @@
{
xassert(bg_record);
- if (bg_record->job_running > NO_JOB_RUNNING)
+ if (bg_record->job_running > NO_JOB_RUNNING
+ || (bg_record->job_list && list_count(bg_record->job_list)))
return SLURM_SUCCESS;
if (bg_record->state & BG_BLOCK_ERROR_FLAG) {
+ ba_mp_t *ba_mp;
+ ListIterator itr;
+ struct node_record *node_ptr;
+
bg_record->state &= (~BG_BLOCK_ERROR_FLAG);
info("Block %s put back into service after "
"being in an error state.",
bg_record->bg_block_id);
+
+ /* Remove the block error message from each slurm node. */
+ itr = list_iterator_create(bg_record->ba_mp_list);
+ while ((ba_mp = list_next(itr))) {
+ node_ptr = &node_record_table_ptr[ba_mp->index];
+ if (node_ptr->reason
+ && !strncmp(node_ptr->reason, "update_block", 12))
+ xfree(node_ptr->reason);
+ }
+ list_iterator_destroy(itr);
}
if (remove_from_bg_list(bg_lists->job_running, bg_record)
@@ -1467,52 +1566,96 @@
}
/* block_state_mutex should be locked before calling this function */
-extern int bg_reset_block(bg_record_t *bg_record)
+extern int bg_reset_block(bg_record_t *bg_record, struct job_record *job_ptr)
{
int rc = SLURM_SUCCESS;
- if (bg_record) {
- if (bg_record->job_running > NO_JOB_RUNNING) {
- bg_record->job_running = NO_JOB_RUNNING;
- bg_record->job_ptr = NULL;
- }
- /* remove user from list */
- if (bg_record->target_name) {
- if (strcmp(bg_record->target_name,
- bg_conf->slurm_user_name)) {
- xfree(bg_record->target_name);
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
- }
- update_block_user(bg_record, 1);
- } else {
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
- }
+ if (!bg_record) {
+ error("bg_reset_block: No block given to reset");
+ return SLURM_ERROR;
+ }
+ if (bg_record->job_list)
+ ba_remove_job_in_block_job_list(bg_record, job_ptr);
- /* Don't reset these (boot_(state/count)), they will be
- reset when state changes, and needs to outlast a job
- allocation.
+ if ((bg_record->job_running > NO_JOB_RUNNING)
+ && (!bg_record->job_list || !list_count(bg_record->job_list))) {
+#ifndef HAVE_BG_L_P
+ /* Just in case the slurmctld wasn't up at the
+ time a step completion message came through
+ we will clear all the cnode_bitmaps of the
+ midplanes of this block. So we can use
+ those cnodes on the next job that uses this
+ block.
*/
- /* bg_record->boot_state = 0; */
- /* bg_record->boot_count = 0; */
+ ba_mp_t *ba_mp = NULL;
+ ListIterator itr = list_iterator_create(bg_record->ba_mp_list);
+ while ((ba_mp = list_next(itr))) {
+ if (!ba_mp->used)
+ continue;
+ if (ba_mp->cnode_usable_bitmap) {
+ FREE_NULL_BITMAP(ba_mp->cnode_bitmap);
+ ba_mp->cnode_bitmap =
+ bit_copy(ba_mp->cnode_usable_bitmap);
+ } else if (ba_mp->cnode_bitmap)
+ bit_nclear(ba_mp->cnode_bitmap, 0,
+ bit_size(ba_mp->cnode_bitmap)-1);
+ }
+ list_iterator_destroy(itr);
+#endif
+ bg_record->job_running = NO_JOB_RUNNING;
+ }
- last_bg_update = time(NULL);
- /* Only remove from the job_running list if
- job_running == NO_JOB_RUNNING, since blocks in
- error state could also be in this list and we don't
- want to remove them.
+ if (bg_record->job_ptr) {
+ num_unused_cpus += bg_record->job_ptr->total_cpus;
+ bg_record->job_ptr = NULL;
+ }
+
+ /* remove user from list */
+ bridge_block_sync_users(bg_record);
+
+ /* Don't reset these (boot_(state/count)), they will be
+ reset when state changes, and needs to outlast a job
+ allocation.
+ */
+ /* bg_record->boot_state = 0; */
+ /* bg_record->boot_count = 0; */
+
+ last_bg_update = time(NULL);
+
+ /* Only remove from the job_running list if
+ job_running == NO_JOB_RUNNING, since blocks in
+ error state could also be in this list and we don't
+ want to remove them.
+ */
+ if (bg_record->job_running == NO_JOB_RUNNING
+ && (!bg_record->job_list || !list_count(bg_record->job_list))) {
+ remove_from_bg_list(bg_lists->job_running, bg_record);
+
+ /* At this point, no job is running on the block
+ anymore, so if there are any errors on it, free it
+ now.
*/
- if (bg_record->job_running == NO_JOB_RUNNING)
- if (remove_from_bg_list(bg_lists->job_running,
- bg_record)
- == SLURM_SUCCESS) {
- num_unused_cpus += bg_record->cpu_cnt;
- }
- } else {
- error("No block given to reset");
- rc = SLURM_ERROR;
+ if (bg_record->cnode_err_cnt) {
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_SELECT_TYPE)
+ info("%s has %d in error",
+ bg_record->bg_block_id,
+ bg_record->cnode_err_cnt);
+ bg_free_block(bg_record, 0, 1);
+ }
+ }
+
+ if (!list_count(bg_lists->job_running)
+ && (num_unused_cpus != num_possible_unused_cpus)) {
+ /* This should never happen, but if it does reset the
+ num_unused_cpus and go on your way.
+ */
+ error("Hey we are here with no jobs and we have only "
+ "%d usuable cpus. We should have %d!",
+ num_unused_cpus, num_possible_unused_cpus);
+ //xassert(0);
+ num_unused_cpus = num_possible_unused_cpus;
}
return rc;
@@ -1624,7 +1767,11 @@
}
return;
}
+
+ slurm_mutex_lock(&ba_system_mutex);
curr_mp = ba_copy_mp(coord2ba_mp(coords));
+ slurm_mutex_unlock(&ba_system_mutex);
+
if (curr_mp) {
curr_mp->used = 1;
list_append(my_list, curr_mp);
@@ -1708,4 +1855,30 @@
return rc;
}
+static void _set_block_avail(bg_record_t *bg_record)
+{
+ bg_record->avail_set = true;
+ if (bg_record->job_ptr) {
+ bg_record->avail_cnode_cnt = 0;
+ bg_record->avail_job_end = bg_record->job_ptr->end_time;
+ } else if (bg_record->job_list) {
+ struct job_record *job_ptr;
+ ListIterator itr =
+ list_iterator_create(bg_record->job_list);
+
+ bg_record->avail_cnode_cnt = bg_record->cnode_cnt;
+ while ((job_ptr = list_next(itr))) {
+ select_jobinfo_t *jobinfo =
+ job_ptr->select_jobinfo->data;
+ if (job_ptr->end_time > bg_record->avail_job_end)
+ bg_record->avail_job_end =
+ job_ptr->end_time;
+ bg_record->avail_cnode_cnt -= jobinfo->cnode_cnt;
+ }
+ list_iterator_destroy(itr);
+ } else {
+ bg_record->avail_cnode_cnt = bg_record->cnode_cnt;
+ bg_record->avail_job_end = 0;
+ }
+}
diff --git a/src/plugins/select/bluegene/bg_record_functions.h b/src/plugins/select/bluegene/bg_record_functions.h
index 867be5e..335da07 100644
--- a/src/plugins/select/bluegene/bg_record_functions.h
+++ b/src/plugins/select/bluegene/bg_record_functions.h
@@ -70,8 +70,6 @@
/* change username of a block bg_record_t target_name needs to be
updated before call of function.
*/
-extern int set_block_user(bg_record_t *bg_record);
-extern int update_block_user(bg_record_t *bg_block_id, int set);
extern void requeue_and_error(bg_record_t *bg_record, char *reason);
extern int add_bg_record(List records, List *used_nodes, select_ba_request_t *blockreq,
@@ -81,10 +79,10 @@
extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size);
extern int down_nodecard(char *bp_name, bitoff_t io_start,
- bool slurmctld_locked);
+ bool slurmctld_locked, char *reason);
extern int up_nodecard(char *bp_name, bitstr_t *ionode_bitmap);
extern int put_block_in_error_state(bg_record_t *bg_record, char *reason);
extern int resume_block(bg_record_t *bg_record);
-extern int bg_reset_block(bg_record_t *bg_record);
+extern int bg_reset_block(bg_record_t *bg_record, struct job_record *job_ptr);
#endif /* _BLUEGENE_BG_RECORD_FUNCTIONS_H_ */
diff --git a/src/plugins/select/bluegene/bg_status.c b/src/plugins/select/bluegene/bg_status.c
index 3f4429e..8226572 100644
--- a/src/plugins/select/bluegene/bg_status.c
+++ b/src/plugins/select/bluegene/bg_status.c
@@ -43,10 +43,6 @@
#define RETRY_BOOT_COUNT 3
-typedef struct {
- int jobid;
-} kill_job_struct_t;
-
static void _destroy_kill_struct(void *object);
static void _destroy_kill_struct(void *object)
@@ -61,33 +57,16 @@
static int _block_is_deallocating(bg_record_t *bg_record, List kill_job_list)
{
int jobid = bg_record->job_running;
- char *user_name = NULL;
if (bg_record->modifying)
return SLURM_SUCCESS;
- user_name = xstrdup(bg_conf->slurm_user_name);
- if (bridge_block_remove_all_users(bg_record, NULL) == REMOVE_USER_ERR) {
- error("Something happened removing users from block %s",
- bg_record->bg_block_id);
- }
-
- if (!bg_record->target_name) {
- error("Target Name was not set for block %s.",
- bg_record->bg_block_id);
- bg_record->target_name = xstrdup(bg_record->user_name);
- }
-
- if (!bg_record->user_name) {
- error("User Name was not set for block %s.",
- bg_record->bg_block_id);
- bg_record->user_name = xstrdup(user_name);
- }
-
if (bg_record->boot_state) {
error("State went to free on a boot for block %s.",
bg_record->bg_block_id);
- } else if (jobid > NO_JOB_RUNNING) {
+ } else if (bg_record->job_ptr && (jobid > NO_JOB_RUNNING)) {
+ select_jobinfo_t *jobinfo =
+ bg_record->job_ptr->select_jobinfo->data;
if (kill_job_list) {
kill_job_struct_t *freeit =
(kill_job_struct_t *)
@@ -95,26 +74,49 @@
freeit->jobid = jobid;
list_push(kill_job_list, freeit);
}
-
error("Block %s was in a ready state "
"for user %s but is being freed. "
"Job %d was lost.",
bg_record->bg_block_id,
- bg_record->user_name,
+ jobinfo->user_name,
jobid);
+ } else if (bg_record->job_list && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ ListIterator itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ select_jobinfo_t *jobinfo;
+
+ if (job_ptr->magic != JOB_MAGIC)
+ continue;
+
+ jobinfo = job_ptr->select_jobinfo->data;
+ if (kill_job_list) {
+ kill_job_struct_t *freeit =
+ (kill_job_struct_t *)
+ xmalloc(sizeof(freeit));
+ freeit->jobid = job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ }
+ error("Block %s was in a ready state "
+ "for user %s but is being freed. "
+ "Job %d was lost.",
+ bg_record->bg_block_id,
+ jobinfo->user_name,
+ job_ptr->job_id);
+ }
+ list_iterator_destroy(itr);
} else {
debug("Block %s was in a ready state "
"but is being freed. No job running.",
bg_record->bg_block_id);
+ /* Make sure block is cleaned up. If there are
+ * running jobs on the block this happens when they
+ * are cleaned off. */
+ bg_reset_block(bg_record, NULL);
}
- if (remove_from_bg_list(bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
- num_unused_cpus += bg_record->cpu_cnt;
remove_from_bg_list(bg_lists->booted, bg_record);
- xfree(user_name);
-
return SLURM_SUCCESS;
}
@@ -149,12 +151,8 @@
mpirun but we missed the state
change */
debug("Block %s skipped rebooting, "
- "but it really is. "
- "Setting target_name back to %s",
- bg_record->bg_block_id,
- bg_record->user_name);
- xfree(bg_record->target_name);
- bg_record->target_name = xstrdup(bg_record->user_name);
+ "but it really is.",
+ bg_record->bg_block_id);
} else if ((real_state == BG_BLOCK_TERM)
&& (state == BG_BLOCK_BOOTING))
/* This is a funky state IBM says
@@ -177,11 +175,14 @@
debug("Setting bootflag for %s", bg_record->bg_block_id);
bg_record->boot_state = 1;
} else if (real_state == BG_BLOCK_FREE) {
- if (remove_from_bg_list(bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
- num_unused_cpus += bg_record->cpu_cnt;
- remove_from_bg_list(bg_lists->booted,
- bg_record);
+ /* Make sure block is cleaned up. If there are
+ * running jobs on the block this happens when they
+ * are cleaned off. */
+ if (bg_record->job_running == NO_JOB_RUNNING
+ && (!bg_record->job_list
+ || !list_count(bg_record->job_list)))
+ bg_reset_block(bg_record, NULL);
+ remove_from_bg_list(bg_lists->booted, bg_record);
} else if (real_state & BG_BLOCK_ERROR_FLAG) {
if (bg_record->boot_state)
error("Block %s in an error state while booting.",
@@ -196,6 +197,7 @@
list_push(bg_lists->booted, bg_record);
}
updated = 1;
+ last_bg_update = time(NULL);
nochange_state:
/* check the boot state */
@@ -216,16 +218,35 @@
switch (real_state) {
case BG_BLOCK_BOOTING:
- debug3("checking to make sure user %s "
- "is the user.",
- bg_record->target_name);
-
- if (update_block_user(bg_record, 0) == 1)
- last_bg_update = time(NULL);
- if (bg_record->job_ptr) {
+ if (bg_record->job_ptr
+ && !IS_JOB_CONFIGURING(bg_record->job_ptr)) {
+ debug3("Setting job %u on block %s "
+ "to configuring",
+ bg_record->job_ptr->job_id,
+ bg_record->bg_block_id);
bg_record->job_ptr->job_state |=
JOB_CONFIGURING;
last_job_update = time(NULL);
+ } else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ ListIterator job_itr =
+ list_iterator_create(
+ bg_record->job_list);
+ while ((job_ptr = list_next(job_itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("bg_status_update_"
+ "block_state: 1 "
+ "bad magic found when "
+ "looking at block %s",
+ bg_record->bg_block_id);
+ list_delete_item(job_itr);
+ continue;
+ }
+ job_ptr->job_state |= JOB_CONFIGURING;
+ }
+ list_iterator_destroy(job_itr);
+ last_job_update = time(NULL);
}
break;
case BG_BLOCK_FREE:
@@ -242,20 +263,23 @@
char *reason = (char *)
"status_check: Boot fails ";
- error("Couldn't boot Block %s for user %s",
- bg_record->bg_block_id,
- bg_record->target_name);
+ error("Couldn't boot Block %s",
+ bg_record->bg_block_id);
+ /* We can't push on the kill_job_list
+ here since we have to put this
+ block in an error and that means
+ the killing has to take place
+ before the erroring of the block.
+ */
slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
requeue_and_error(bg_record, reason);
+ lock_slurmctld(job_read_lock);
slurm_mutex_lock(&block_state_mutex);
bg_record->boot_state = 0;
bg_record->boot_count = 0;
- if (remove_from_bg_list(
- bg_lists->job_running, bg_record)
- == SLURM_SUCCESS)
- num_unused_cpus += bg_record->cpu_cnt;
remove_from_bg_list(bg_lists->booted,
bg_record);
@@ -264,14 +288,40 @@
case BG_BLOCK_INITED:
debug("block %s is ready.",
bg_record->bg_block_id);
- if (bg_record->job_ptr) {
+ if (bg_record->job_ptr
+ && IS_JOB_CONFIGURING(bg_record->job_ptr)) {
bg_record->job_ptr->job_state &=
(~JOB_CONFIGURING);
last_job_update = time(NULL);
+ } else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ ListIterator job_itr =
+ list_iterator_create(
+ bg_record->job_list);
+ while ((job_ptr = list_next(job_itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("bg_status_update_"
+ "block_state: 2 "
+ "bad magic found when "
+ "looking at block %s",
+ bg_record->bg_block_id);
+ list_delete_item(job_itr);
+ continue;
+ }
+ job_ptr->job_state &=
+ (~JOB_CONFIGURING);
+ }
+ list_iterator_destroy(job_itr);
+ last_job_update = time(NULL);
}
- /* boot flags are reset here */
+
+ bg_record->boot_state = 0;
+ bg_record->boot_count = 0;
+
if (kill_job_list &&
- set_block_user(bg_record) == SLURM_ERROR) {
+ bridge_block_sync_users(bg_record)
+ == SLURM_ERROR) {
freeit = (kill_job_struct_t *)
xmalloc(sizeof(kill_job_struct_t));
freeit->jobid = bg_record->job_running;
@@ -306,7 +356,8 @@
return list_create(_destroy_kill_struct);
}
-extern void bg_status_process_kill_job_list(List kill_job_list)
+extern void bg_status_process_kill_job_list(List kill_job_list,
+ bool slurmctld_locked)
{
kill_job_struct_t *freeit = NULL;
@@ -316,7 +367,7 @@
/* kill all the jobs from unexpectedly freed blocks */
while ((freeit = list_pop(kill_job_list))) {
debug2("Trying to requeue job %u", freeit->jobid);
- bg_requeue_job(freeit->jobid, 0);
+ bg_requeue_job(freeit->jobid, 0, slurmctld_locked);
_destroy_kill_struct(freeit);
}
}
diff --git a/src/plugins/select/bluegene/bg_status.h b/src/plugins/select/bluegene/bg_status.h
index cce5e32..715d512 100644
--- a/src/plugins/select/bluegene/bg_status.h
+++ b/src/plugins/select/bluegene/bg_status.h
@@ -40,14 +40,23 @@
#include "bg_core.h"
+typedef struct {
+ uint32_t jobid;
+} kill_job_struct_t;
+
extern int bg_status_update_block_state(bg_record_t *bg_record,
uint16_t state,
List kill_job_list);
extern List bg_status_create_kill_job_list(void);
-extern void bg_status_process_kill_job_list(List kill_job_list);
+extern void bg_status_process_kill_job_list(List kill_job_list,
+ bool slurmctld_locked);
/* defined in the various bridge_status' */
+extern int bridge_status_init(void);
+
extern int bridge_block_check_mp_states(char *bg_block_id,
bool slurmctld_locked);
+/* This needs to have block_state_mutex locked before hand. */
+extern int bridge_status_update_block_list_state(List block_list);
#endif
diff --git a/src/plugins/select/bluegene/bg_structs.h b/src/plugins/select/bluegene/bg_structs.h
index 0d189b0..fe26596 100644
--- a/src/plugins/select/bluegene/bg_structs.h
+++ b/src/plugins/select/bluegene/bg_structs.h
@@ -50,6 +50,7 @@
#include "src/common/bitstring.h"
typedef struct {
+ uint32_t actual_cnodes_per_mp; /* used only on sub_mp_systems */
List blrts_list;
char *bridge_api_file;
uint16_t bridge_api_verb;
@@ -59,12 +60,14 @@
char *default_linuximage;
char *default_mloaderimage;
char *default_ramdiskimage;
+ uint16_t default_conn_type[SYSTEM_DIMENSIONS];
uint16_t deny_pass;
double io_ratio;
uint16_t ionode_cnode_cnt;
uint16_t ionodes_per_mp;
bg_layout_t layout_mode;
List linux_list;
+ uint16_t max_block_err;
List mloader_list;
uint16_t mp_cnode_cnt;
uint16_t mp_nodecard_cnt;
@@ -79,6 +82,8 @@
char *slurm_node_prefix;
char *slurm_user_name;
uint32_t smallest_block;
+ uint16_t sub_blocks;
+ uint16_t sub_mp_sys;
} bg_config_t;
typedef struct {
@@ -92,6 +97,15 @@
} bg_lists_t;
typedef struct bg_record {
+ uint16_t action; /* Any action that might be on
+ the block. At the moment,
+ don't pack. */
+ bool avail_set; /* Used in sorting, don't copy
+ or pack. */
+ uint32_t avail_cnode_cnt; /* Used in sorting, don't copy
+ or pack. */
+ time_t avail_job_end; /* Used in sorting, don't copy
+ or pack. */
void *bg_block; /* needed for L/P systems */
char *bg_block_id; /* ID returned from MMCS */
List ba_mp_list; /* List of midplanes in block */
@@ -102,8 +116,14 @@
0 = not booting,
1 = booting */
uint32_t cnode_cnt; /* count of cnodes per block */
+ uint32_t cnode_err_cnt; /* count of cnodes in error on
+ block */
uint16_t conn_type[SYSTEM_DIMENSIONS]; /* MESH or Torus or NAV */
uint32_t cpu_cnt; /* count of cpus per block */
+ int destroy; /* if the block is being destroyed */
+ uint16_t err_ratio; /* ratio of how much of this
+ block is in an error
+ state. (doesn't apply to BGL/P) */
int free_cnt; /* How many are trying
to free this block at the
same time */
@@ -115,8 +135,8 @@
are on. NULL if not a small block*/
char *ionode_str; /* String of ionodes in block
* NULL if not a small block*/
- List job_list; /* List of jobs running on a
- small block */
+ List job_list; /* List of job records running on a
+ block that allows multiple jobs */
struct job_record *job_ptr; /* pointer to job running on
* block or NULL if no job */
int job_running; /* job id of job running of if
@@ -133,8 +153,6 @@
of block */
int mp_count; /* size */
char *mp_str; /* String of midplanes in block */
- bitstr_t *mp_used_bitmap; /* cnodes used in this bitmap */
- char *mp_used_str; /* String of midplanes used in block */
uint16_t node_use; /* either COPROCESSOR or VIRTUAL */
struct bg_record *original; /* if this is a copy this is a
pointer to the original */
@@ -143,13 +161,14 @@
char *reason; /* reason block is in error state */
uint16_t state; /* Current state of the block */
uint16_t start[SYSTEM_DIMENSIONS]; /* start node */
+ uint16_t start_small[HIGHEST_DIMENSIONS]; /* On a small block
+ * what the starting
+ * cnode is to
+ * figure out the
+ * relative position
+ * of jobs */
uint32_t switch_count; /* number of switches
* used. On L/P */
- char *target_name; /* when a block is freed this
- is the name of the user we
- want on the block */
- char *user_name; /* user using the block */
- uid_t user_uid; /* Owner of block uid */
} bg_record_t;
#endif
diff --git a/src/plugins/select/bluegene/bl/Makefile.am b/src/plugins/select/bluegene/bl/Makefile.am
index 3a93f16..5ad4bcc 100644
--- a/src/plugins/select/bluegene/bl/Makefile.am
+++ b/src/plugins/select/bluegene/bl/Makefile.am
@@ -8,7 +8,11 @@
# making a .la
noinst_LTLIBRARIES = libbridge_linker.la
-libbridge_linker_la_SOURCES = bridge_linker.c bridge_status.c \
- bridge_switch_connections.c
+libbridge_linker_la_SOURCES = \
+ bridge_linker.c \
+ bridge_status.c \
+ bridge_status.h \
+ bridge_switch_connections.c \
+ bridge_switch_connections.h
libbridge_linker_la_LDFLAGS = $(LIB_LDFLAGS) -lm
diff --git a/src/plugins/select/bluegene/bl/Makefile.in b/src/plugins/select/bluegene/bl/Makefile.in
index 7908e44..346354d 100644
--- a/src/plugins/select/bluegene/bl/Makefile.in
+++ b/src/plugins/select/bluegene/bl/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -187,6 +187,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -223,6 +224,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -316,8 +318,12 @@
# making a .la
noinst_LTLIBRARIES = libbridge_linker.la
-libbridge_linker_la_SOURCES = bridge_linker.c bridge_status.c \
- bridge_switch_connections.c
+libbridge_linker_la_SOURCES = \
+ bridge_linker.c \
+ bridge_status.c \
+ bridge_status.h \
+ bridge_switch_connections.c \
+ bridge_switch_connections.h
libbridge_linker_la_LDFLAGS = $(LIB_LDFLAGS) -lm
all: all-am
@@ -363,7 +369,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libbridge_linker.la: $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_DEPENDENCIES)
+libbridge_linker.la: $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_DEPENDENCIES) $(EXTRA_libbridge_linker_la_DEPENDENCIES)
$(libbridge_linker_la_LINK) $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -499,10 +505,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/bl/bridge_linker.c b/src/plugins/select/bluegene/bl/bridge_linker.c
index 06ac3f7..abfd8bd 100644
--- a/src/plugins/select/bluegene/bl/bridge_linker.c
+++ b/src/plugins/select/bluegene/bl/bridge_linker.c
@@ -493,10 +493,6 @@
slurm_mutex_lock(&block_state_mutex);
bg_record = find_bg_record_in_list(bg_lists->main, block_id);
if (bg_record) {
- debug("got the record %s user is %s",
- bg_record->bg_block_id,
- bg_record->user_name);
-
if (job_remove_failed) {
if (bg_record->mp_str)
slurm_drain_nodes(
@@ -508,7 +504,7 @@
block_id);
}
- bg_reset_block(bg_record);
+ bg_reset_block(bg_record, NULL);
} else if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
debug2("Hopefully we are destroying this block %s "
"since it isn't in the bg_lists->main",
@@ -613,7 +609,6 @@
#if defined HAVE_BG_FILES
int i;
pm_partition_id_t block_id;
- uid_t my_uid;
/* Add partition record to the DB */
debug2("adding block");
@@ -654,20 +649,6 @@
free(block_id);
- xfree(bg_record->target_name);
-
-
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
-
- xfree(bg_record->user_name);
- bg_record->user_name =
- xstrdup(bg_conf->slurm_user_name);
-
- if (uid_from_string (bg_record->user_name, &my_uid) < 0)
- error("uid_from_string(%s): %m", bg_record->user_name);
- else
- bg_record->user_uid = my_uid;
}
/* We are done with the block */
if ((rc = bridge_free_block(bg_record->bg_block)) != SLURM_SUCCESS)
@@ -1435,7 +1416,6 @@
#else
fatal("No BG_SERIAL is set, can't run.");
#endif
- bridge_status_init();
#endif
return 1;
@@ -1666,7 +1646,7 @@
#endif
}
-extern int bridge_block_add_user(bg_record_t *bg_record, char *user_name)
+extern int bridge_block_add_user(bg_record_t *bg_record, const char *user_name)
{
#if defined HAVE_BG_FILES
int rc = BG_ERROR_CONNECTION_ERROR;
@@ -1683,7 +1663,8 @@
#endif
}
-extern int bridge_block_remove_user(bg_record_t *bg_record, char *user_name)
+extern int bridge_block_remove_user(bg_record_t *bg_record,
+ const char *user_name)
{
#if defined HAVE_BG_FILES
int rc = BG_ERROR_CONNECTION_ERROR;
@@ -1700,14 +1681,14 @@
#endif
}
-extern int bridge_block_remove_all_users(bg_record_t *bg_record,
- char *user_name)
+extern int bridge_block_sync_users(bg_record_t *bg_record)
{
- int returnc = REMOVE_USER_NONE;
+ int returnc = SLURM_SUCCESS;
#ifdef HAVE_BG_FILES
char *user;
rm_partition_t *block_ptr = NULL;
- int rc, i, user_count;
+ int rc, i, user_count, found=0;
+ char *user_name = NULL;
/* We can't use bridge_get_block_info here because users are
filled in there. This function is very slow but necessary
@@ -1716,7 +1697,7 @@
!= SLURM_SUCCESS) {
if (rc == BG_ERROR_INCONSISTENT_DATA
&& bg_conf->layout_mode == LAYOUT_DYNAMIC)
- return REMOVE_USER_FOUND;
+ return SLURM_SUCCESS;
error("bridge_get_block(%s): %s",
bg_record->bg_block_id,
@@ -1735,6 +1716,13 @@
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
info("got %d users for %s", user_count,
bg_record->bg_block_id);
+
+ if (bg_record->job_ptr) {
+ select_jobinfo_t *jobinfo =
+ bg_record->job_ptr->select_jobinfo->data;
+ user_name = jobinfo->user_name;
+ }
+
for(i=0; i<user_count; i++) {
if (i) {
if ((rc = bridge_get_data(block_ptr,
@@ -1759,23 +1747,29 @@
break;
}
}
+
if (!user) {
error("No user was returned from database");
continue;
}
- if (!strcmp(user, bg_conf->slurm_user_name)) {
+
+ /* It has been found on L the block owner is not
+ needed as a regular user so we are now removing
+ it. It is unknown if this is the case for P but we
+ believe it is. If a problem does arise on P please
+ report and just uncomment this check.
+ */
+ /* if (!strcmp(user, bg_conf->slurm_user_name)) { */
+ /* free(user); */
+ /* continue; */
+ /* } */
+
+ if (user_name && !strcmp(user, user_name)) {
+ found=1;
free(user);
continue;
}
- if (user_name) {
- if (!strcmp(user, user_name)) {
- returnc = REMOVE_USER_FOUND;
- free(user);
- continue;
- }
- }
-
info("Removing user %s from Block %s",
user, bg_record->bg_block_id);
if ((rc = _remove_block_user(bg_record->bg_block_id, user))
@@ -1786,6 +1780,17 @@
}
free(user);
}
+
+ // no users currently, or we didn't find outselves in the lookup
+ if (!found && user_name) {
+ returnc = REMOVE_USER_FOUND;
+ if ((rc = bridge_block_add_user(bg_record, user_name))
+ != SLURM_SUCCESS) {
+ debug("couldn't add user %s to block %s",
+ user, bg_record->bg_block_id);
+ }
+ }
+
if ((rc = bridge_free_block(block_ptr)) != SLURM_SUCCESS) {
error("bridge_free_block(): %s", bg_err_str(rc));
}
@@ -1803,9 +1808,7 @@
int mp_cnt;
rm_partition_t *block_ptr = NULL;
- char *user_name = NULL;
bg_record_t *bg_record = NULL;
- uid_t my_uid;
int block_number, block_count;
char *bg_block_id = NULL;
@@ -1915,107 +1918,13 @@
bg_err_str(rc));
continue;
}
-
- xfree(bg_record->user_name);
- xfree(bg_record->target_name);
-
- if (mp_cnt==0) {
- bg_record->user_name =
- xstrdup(bg_conf->slurm_user_name);
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
- } else {
- user_name = NULL;
- if ((rc = bridge_get_data(block_ptr,
- RM_PartitionFirstUser,
- &user_name))
- != SLURM_SUCCESS) {
- error("bridge_get_data"
- "(RM_PartitionFirstUser): %s",
- bg_err_str(rc));
- continue;
- }
- if (!user_name) {
- error("No user name was "
- "returned from database");
- continue;
- }
- bg_record->user_name = xstrdup(user_name);
-
- if (!bg_record->boot_state)
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
- else
- bg_record->target_name = xstrdup(user_name);
- free(user_name);
- }
- if (uid_from_string (bg_record->user_name, &my_uid)<0){
- error("uid_from_string(%s): %m",
- bg_record->user_name);
- } else {
- bg_record->user_uid = my_uid;
- }
}
bridge_free_block_list(block_list);
#endif
return rc;
}
-extern void bridge_reset_block_list(List block_list)
-{
- ListIterator itr = NULL;
- bg_record_t *bg_record = NULL;
- rm_job_list_t *job_list = NULL;
- int jobs = 0;
-
-#if defined HAVE_BG_FILES
- int live_states, rc;
-#endif
-
- if (!block_list)
- return;
-
-#if defined HAVE_BG_FILES
- debug2("getting the job info");
- live_states = JOB_ALL_FLAG
- & (~JOB_TERMINATED_FLAG)
- & (~JOB_KILLED_FLAG)
- & (~JOB_ERROR_FLAG);
-
- if ((rc = _get_jobs(live_states, &job_list)) != SLURM_SUCCESS) {
- error("bridge_get_jobs(): %s", bg_err_str(rc));
-
- return;
- }
-
- if ((rc = bridge_get_data(job_list, RM_JobListSize, &jobs))
- != SLURM_SUCCESS) {
- error("bridge_get_data(RM_JobListSize): %s", bg_err_str(rc));
- jobs = 0;
- }
- debug2("job count %d",jobs);
-#endif
- itr = list_iterator_create(block_list);
- while ((bg_record = list_next(itr))) {
- info("Queue clearing of users of BG block %s",
- bg_record->bg_block_id);
-#ifndef HAVE_BG_FILES
- /* simulate jobs running and need to be cleared from MMCS */
- if (bg_record->job_ptr)
- jobs = 1;
-#endif
- _remove_jobs_on_block_and_reset(job_list, jobs,
- bg_record->bg_block_id);
- }
- list_iterator_destroy(itr);
-
-#if defined HAVE_BG_FILES
- if ((rc = _free_job_list(job_list)) != SLURM_SUCCESS)
- error("bridge_free_job_list(): %s", bg_err_str(rc));
-#endif
-}
-
-extern void bridge_block_post_job(char *bg_block_id)
+extern void bridge_block_post_job(char *bg_block_id, struct job_record *job_ptr)
{
int jobs = 0;
rm_job_list_t *job_list = NULL;
@@ -2042,8 +1951,25 @@
jobs = 0;
}
debug2("job count %d",jobs);
+#else
+ /* simulate jobs running and need to be cleared from MMCS */
+ jobs = 1;
#endif
_remove_jobs_on_block_and_reset(job_list, jobs, bg_block_id);
+ if (job_ptr) {
+ slurmctld_lock_t job_read_lock =
+ { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
+ lock_slurmctld(job_read_lock);
+ if (job_ptr->magic == JOB_MAGIC) {
+ /* This signals the job purger that the job
+ actually finished in the system.
+ */
+ select_jobinfo_t *jobinfo =
+ job_ptr->select_jobinfo->data;
+ jobinfo->bg_record = NULL;
+ }
+ unlock_slurmctld(job_read_lock);
+ }
#if defined HAVE_BG_FILES
if ((rc = _free_job_list(job_list)) != SLURM_SUCCESS)
@@ -2077,6 +2003,16 @@
}
+extern uint16_t bridge_block_get_action(char *bg_block_id)
+{
+ return BG_BLOCK_ACTION_NONE;
+}
+
+extern int bridge_check_nodeboards(char *mp_loc)
+{
+ return 0;
+}
+
extern int bridge_set_log_params(char *api_file_name, unsigned int level)
{
static FILE *fp = NULL;
diff --git a/src/plugins/select/bluegene/bl/bridge_status.c b/src/plugins/select/bluegene/bl/bridge_status.c
index b957ed6..656d73f 100644
--- a/src/plugins/select/bluegene/bl/bridge_status.c
+++ b/src/plugins/select/bluegene/bl/bridge_status.c
@@ -272,7 +272,7 @@
/* we have to handle each nodecard separately to make
sure we don't create holes in the system */
- if (down_nodecard(node_name, io_start, slurmctld_locked)
+ if (down_nodecard(node_name, io_start, slurmctld_locked, NULL)
== SLURM_SUCCESS) {
debug("nodecard %s on %s is in an error state",
nc_name, node_name);
@@ -570,6 +570,7 @@
if (!bg_lists->main)
return updated;
+ lock_slurmctld(job_read_lock);
slurm_mutex_lock(&block_state_mutex);
itr = list_iterator_create(bg_lists->main);
while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
@@ -702,8 +703,9 @@
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
- bg_status_process_kill_job_list(kill_job_list);
+ bg_status_process_kill_job_list(kill_job_list, 0);
#endif
return updated;
diff --git a/src/plugins/select/bluegene/bl/bridge_status.h b/src/plugins/select/bluegene/bl/bridge_status.h
index 39cbb99..5cecc5d 100644
--- a/src/plugins/select/bluegene/bl/bridge_status.h
+++ b/src/plugins/select/bluegene/bl/bridge_status.h
@@ -39,10 +39,6 @@
#ifndef _BRIDGE_STATUS_H_
#define _BRIDGE_STATUS_H_
-extern int bridge_status_init(void);
extern int bridge_status_fini(void);
-/* This needs to have block_state_mutex locked before hand. */
-extern int bridge_status_update_block_list_state(List block_list);
-
#endif
diff --git a/src/plugins/select/bluegene/bl_bgq/Makefile.in b/src/plugins/select/bluegene/bl_bgq/Makefile.in
index 6f226ca..92e2eef 100644
--- a/src/plugins/select/bluegene/bl_bgq/Makefile.in
+++ b/src/plugins/select/bluegene/bl_bgq/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -196,6 +196,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -232,6 +233,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -373,7 +375,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libbridge_linker.la: $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_DEPENDENCIES)
+libbridge_linker.la: $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_DEPENDENCIES) $(EXTRA_libbridge_linker_la_DEPENDENCIES)
$(libbridge_linker_la_LINK) $(libbridge_linker_la_OBJECTS) $(libbridge_linker_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -509,10 +511,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
index 67d5503..0ed93e2 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
@@ -66,6 +66,10 @@
case bgsched::DatabaseErrors::ConnectionError:
error("%s: Can't connect to the database!", function);
break;
+ case bgsched::DatabaseErrors::UnexpectedError:
+ error("%s: UnexpectedError returned from the database!",
+ function);
+ break;
default:
error("%s: Unexpected Database exception value %d",
function, err);
@@ -150,6 +154,9 @@
case bgsched::InputErrors::InvalidNodeBoardCount:
error("%s: Invalid NodeBoard count.", function);
break;
+ case bgsched::InputErrors::InvalidNodeBoardPosition:
+ error("%s: Invalid NodeBoard position.", function);
+ break;
case bgsched::InputErrors::InvalidMidplanes:
error("%s: Invalid midplanes given.", function);
break;
@@ -220,13 +227,49 @@
switch (err) {
case bgsched::RuntimeErrors::BlockBootError:
- error("%s: Error booting block %s.", function,
- bg_record->bg_block_id);
+ {
+ BlockFilter filter;
+ Block::Ptrs vec;
+
rc = BG_ERROR_BOOT_ERROR;
+
+ if ((bg_record->magic != BLOCK_MAGIC)
+ || !bg_record->bg_block_id) {
+ error("%s: bad block given to booting.", function);
+ break;
+ }
+
+ filter.setName(string(bg_record->bg_block_id));
+
+ vec = bridge_get_blocks(filter);
+ if (vec.empty()) {
+ debug("%s: block %s not found, removing "
+ "from slurm", function, bg_record->bg_block_id);
+ break;
+ }
+ const Block::Ptr &block_ptr = *(vec.begin());
+ uint16_t state = bridge_translate_status(
+ block_ptr->getStatus().toValue());
+ if (state == BG_BLOCK_FREE) {
+ error("%s: Block %s was free but we got an error "
+ "while trying to boot it. (system=%s) (us=%s)",
+ function, bg_record->bg_block_id,
+ bg_block_state_string(state),
+ bg_block_state_string(bg_record->state));
+ } else {
+ debug2("%s: tring to boot a block %s that wasn't "
+ "free (system=%s) (us=%s), no real error.",
+ function, bg_record->bg_block_id,
+ bg_block_state_string(state),
+ bg_block_state_string(bg_record->state));
+ rc = SLURM_SUCCESS;
+ }
+
break;
+ }
case bgsched::RuntimeErrors::BlockFreeError:
/* not a real error */
- rc = BG_ERROR_INVALID_STATE;
+ rc = BG_ERROR_FREE;
debug2("%s: Error freeing block %s.", function,
bg_record->bg_block_id);
break;
@@ -251,6 +294,9 @@
case bgsched::RuntimeErrors::AuthorityError:
error("%s: Authority Error.", function);
break;
+ case bgsched::RuntimeErrors::HardwareInUseError:
+ error("%s: Hardware in use Error.", function);
+ break;
default:
error("%s: Unexpected Runtime exception value %d.",
function, err);
@@ -258,6 +304,141 @@
return rc;
}
+/* RealTime errors */
+
+extern int bridge_handle_realtime_client_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::ClientStateErrors::MustBeConnected:
+ error("%s: The real-time client must be connected before "
+ "this method is called, and apparently you are not",
+ function);
+ break;
+ default:
+ error("%s: Unexpected Realtime client error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
+extern int bridge_handle_realtime_configuration_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::ConfigurationErrors::InvalidHost:
+ error("%s: The host value given is not in the correct format",
+ function);
+ break;
+ case bgsched::realtime::ConfigurationErrors::MissingSecurityProperty:
+ error("%s: A required security configuration property is "
+ "missing from the bg.properties file",
+ function);
+ break;
+ default:
+ error("%s: Unexpected Realtime Configuration error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
+extern int bridge_handle_realtime_connection_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::ConnectionErrors::CannotResolve:
+ error("%s: Cannot resolve the real-time server host or port",
+ function);
+ break;
+ case bgsched::realtime::ConnectionErrors::CannotConnect:
+ error("%s: Cannot connect to the real-time server",
+ function);
+ break;
+ case bgsched::realtime::ConnectionErrors::LostConnection:
+ error("%s: Unexpectedly lost the connection to the "
+ "real-time server",
+ function);
+ break;
+ default:
+ error("%s: Unexpected Realtime Connection error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
+extern int bridge_handle_realtime_filter_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::FilterErrors::PatternNotValid:
+ error("%s: The pattern supplied to the filter option "
+ "is not valid", function);
+ break;
+ default:
+ error("%s: Unexpected Realtime Filter error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
+extern int bridge_handle_realtime_internal_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::InternalErrors::ApiUnexpectedFailure:
+ error("%s: An API called by the real-time client "
+ "failed in an unexpected way.", function);
+ break;
+ default:
+ error("%s: Unexpected Realtime Internal error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
+extern int bridge_handle_realtime_protocol_errors(const char *function,
+ const uint32_t err)
+{
+ int rc = SLURM_ERROR;
+
+ switch (err) {
+ case bgsched::realtime::ProtocolErrors::MessageTooLong:
+ error("%s: A message received from the real-time server is "
+ "too long", function);
+ break;
+ case bgsched::realtime::ProtocolErrors::UnexpectedMessageType:
+ error("%s: The type of message received from the real-time "
+ "server is not expected", function);
+ break;
+ case bgsched::realtime::ProtocolErrors::ErrorReadingMessage:
+ error("%s: An error occurred parsing a message received "
+ "from the real-time server", function);
+ break;
+ case bgsched::realtime::ProtocolErrors::UnexpectedDbChangeType:
+ error("%s: The type of DB change message received "
+ "from the real-time server is not expected", function);
+ break;
+ case bgsched::realtime::ProtocolErrors::MessageNotValid:
+ error("%s: A message received from the real-time server "
+ "is not valid", function);
+ break;
+ default:
+ error("%s: Unexpected Realtime Protocol error: %d.",
+ function, err);
+ }
+ return rc;
+}
+
extern uint16_t bridge_translate_status(bgsched::Block::Status state_in)
{
switch (state_in) {
@@ -284,6 +465,28 @@
return BG_BLOCK_NAV;
}
+#if defined HAVE_BG_GET_ACTION
+extern uint16_t bridge_translate_action(bgsched::Block::Action::Value action_in)
+{
+ switch (action_in) {
+ case Block::Action::None:
+ return BG_BLOCK_ACTION_NONE;
+ break;
+ case Block::Action::Boot:
+ return BG_BLOCK_ACTION_BOOT;
+ break;
+ case Block::Action::Free:
+ return BG_BLOCK_ACTION_FREE;
+ default:
+ error("unknown block action %d", action_in);
+ return BG_BLOCK_ACTION_NAV;
+ break;
+ }
+ error("unknown block action %d", action_in);
+ return BG_BLOCK_ACTION_NAV;
+}
+#endif
+
extern uint16_t bridge_translate_switch_usage(bgsched::Switch::InUse usage_in)
{
switch (usage_in) {
@@ -315,4 +518,156 @@
return BG_SWITCH_NONE;
}
+
+extern const char *bridge_hardware_state_string(const int state)
+{
+ switch(state) {
+ case Hardware::Available:
+ return "Available";
+ case Hardware::Missing:
+ return "Missing";
+ case Hardware::Error:
+ return "Error";
+ case Hardware::Service:
+ return "Service";
+ case Hardware::SoftwareFailure:
+ return "SoftwareFailure";
+ default:
+ return "Unknown";
+ }
+ return "Unknown";
+}
+
+/* helper functions */
+
+extern Block::Ptrs bridge_get_blocks(BlockFilter filter)
+{
+ Block::Ptrs vec;
+
+ try {
+ vec = getBlocks(filter);
+ } catch (const bgsched::DatabaseException& err) {
+ bridge_handle_database_errors("getBlocks",
+ err.getError().toValue());
+ } catch (const bgsched::InternalException& err) {
+ bridge_handle_internal_errors("getBlocks",
+ err.getError().toValue());
+ } catch (const bgsched::RuntimeException& err) {
+ bridge_handle_runtime_errors("getBlocks",
+ err.getError().toValue(),
+ NULL);
+ } catch (...) {
+ error("Unknown error from getBlocks().");
+ }
+
+ return vec;
+}
+
+extern Midplane::ConstPtr bridge_get_midplane(ComputeHardware::ConstPtr bgqsys,
+ ba_mp_t *ba_mp)
+{
+ Midplane::ConstPtr mp_ptr;
+
+ assert(ba_mp);
+
+ try {
+ Coordinates::Coordinates coords(
+ ba_mp->coord[0], ba_mp->coord[1],
+ ba_mp->coord[2], ba_mp->coord[3]);
+ mp_ptr = bgqsys->getMidplane(coords);
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors(
+ "ComputeHardware::getMidplane",
+ err.getError().toValue(), NULL);
+ } catch (...) {
+ error("Unknown error from ComputeHardware::getMidplane.");
+ }
+ return mp_ptr;
+}
+
+extern Node::ConstPtrs bridge_get_midplane_nodes(const std::string& loc)
+{
+ Node::ConstPtrs vec;
+
+ try {
+ vec = getMidplaneNodes(loc);
+ } catch (const bgsched::DatabaseException& err) {
+ bridge_handle_database_errors("getMidplaneNodes",
+ err.getError().toValue());
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors("getMidplaneNodes",
+ err.getError().toValue(),
+ NULL);
+ } catch (const bgsched::InternalException& err) {
+ bridge_handle_internal_errors("getMidplaneNodes",
+ err.getError().toValue());
+ } catch (...) {
+ error("Unknown error from getMidplaneNodes.");
+ }
+ return vec;
+}
+
+extern NodeBoard::ConstPtr bridge_get_nodeboard(Midplane::ConstPtr mp_ptr,
+ int nodeboard_num)
+{
+ NodeBoard::ConstPtr nb_ptr;
+
+ try {
+ nb_ptr = mp_ptr->getNodeBoard(nodeboard_num);
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors("Midplane::getNodeBoard",
+ err.getError().toValue(),
+ NULL);
+ } catch (...) {
+ error("Unknown error from Midplane::getNodeBoard.");
+ }
+ return nb_ptr;
+}
+
+extern NodeBoard::ConstPtrs bridge_get_nodeboards(const std::string& mp_loc)
+{
+ NodeBoard::ConstPtrs nb_ptr;
+
+ try {
+ nb_ptr = getNodeBoards(mp_loc);
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors("getNodeBoards",
+ err.getError().toValue(),
+ NULL);
+ } catch (...) {
+ error("Unknown error from getNodeBoards.");
+ }
+ return nb_ptr;
+}
+
+extern Switch::ConstPtr bridge_get_switch(Midplane::ConstPtr mp_ptr, int dim)
+{
+ Switch::ConstPtr switch_ptr;
+
+ try {
+ switch_ptr = mp_ptr->getSwitch(dim);
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors("Midplane::getSwitch",
+ err.getError().toValue(),
+ NULL);
+ } catch (...) {
+ error("Unknown error from Midplane::getSwitch.");
+ }
+ return switch_ptr;
+}
+
+extern ComputeHardware::ConstPtr bridge_get_compute_hardware()
+{
+ ComputeHardware::ConstPtr bgqsys;
+
+ try {
+ bgqsys = getComputeHardware();
+ } catch (const bgsched::InternalException& err) {
+ bridge_handle_internal_errors("getComputeHardware",
+ err.getError().toValue());
+ } catch (...) {
+ error("Unknown error from getComputeHardware");
+ }
+ return bgqsys;
+}
#endif
diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.h b/src/plugins/select/bluegene/bl_bgq/bridge_helper.h
index 51955c4..d01b057 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.h
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.h
@@ -56,12 +56,20 @@
#include <bgsched/Block.h>
#include <bgsched/core/core.h>
+#include <bgsched/realtime/ClientStateException.h>
+#include <bgsched/realtime/ConnectionException.h>
+#include <bgsched/realtime/InternalErrorException.h>
+#include <bgsched/realtime/ConfigurationException.h>
+#include <bgsched/realtime/FilterException.h>
+#include <bgsched/realtime/ProtocolException.h>
+
#include <boost/foreach.hpp>
using namespace std;
using namespace bgsched;
using namespace bgsched::core;
+/* core errors */
extern int bridge_handle_database_errors(
const char *function, const uint32_t err);
extern int bridge_handle_init_errors(
@@ -74,8 +82,39 @@
const uint32_t err,
bg_record_t *bg_record);
+/* realtime errors */
+extern int bridge_handle_realtime_client_errors(const char *function,
+ const uint32_t err);
+extern int bridge_handle_realtime_configuration_errors(const char *function,
+ const uint32_t err);
+extern int bridge_handle_realtime_connection_errors(const char *function,
+ const uint32_t err);
+extern int bridge_handle_realtime_filter_errors(const char *function,
+ const uint32_t err);
+extern int bridge_handle_realtime_internal_errors(const char *function,
+ const uint32_t err);
+extern int bridge_handle_realtime_protocol_errors(const char *function,
+ const uint32_t err);
+
+/* traslate functions */
extern uint16_t bridge_translate_status(bgsched::Block::Status state_in);
+#if defined HAVE_BG_GET_ACTION
+extern uint16_t bridge_translate_action(
+ bgsched::Block::Action::Value action_in);
+#endif
extern uint16_t bridge_translate_switch_usage(bgsched::Switch::InUse usage_in);
+extern const char *bridge_hardware_state_string(const int state);
+
+/* helper functions */
+extern Block::Ptrs bridge_get_blocks(BlockFilter filter);
+extern Midplane::ConstPtr bridge_get_midplane(ComputeHardware::ConstPtr bgqsys,
+ ba_mp_t *ba_mp);
+extern Node::ConstPtrs bridge_get_midplane_nodes(const std::string& loc);
+extern NodeBoard::ConstPtr bridge_get_nodeboard(Midplane::ConstPtr mp_ptr,
+ int nodeboard_num);
+extern NodeBoard::ConstPtrs bridge_get_nodeboards(const std::string& mp_loc);
+extern Switch::ConstPtr bridge_get_switch(Midplane::ConstPtr mp_ptr, int dim);
+extern ComputeHardware::ConstPtr bridge_get_compute_hardware();
#endif
diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc
index 560f361..574b360 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc
@@ -55,7 +55,6 @@
extern "C" {
#include "../ba_bgq/block_allocator.h"
-#include "../bg_record_functions.h"
#include "src/common/parse_time.h"
#include "src/common/uid.h"
}
@@ -69,32 +68,64 @@
#ifdef HAVE_BG_FILES
-static void _setup_ba_mp(ComputeHardware::ConstPtr bgq, ba_mp_t *ba_mp)
+// For future code
+//
+// static int _check_version(
+// const unsigned major, const unsigned minor, const unsigned micro)
+// {
+// if ((version::major > major)
+// || (version::major == major
+// && version::minor > minor)
+// || (version::major == major
+// && version::minor == minor
+// && version::mod >= micro))
+// return true;
+
+// return false;
+// }
+
+/* ba_system_mutex needs to be locked before coming here */
+static void _setup_ba_mp(int level, uint16_t *coords,
+ ComputeHardware::ConstPtr bgqsys)
{
- // int i;
- Coordinates::Coordinates coords(ba_mp->coord[A], ba_mp->coord[X],
- ba_mp->coord[Y], ba_mp->coord[Z]);
+ ba_mp_t *ba_mp;
Midplane::ConstPtr mp_ptr;
int i;
- try {
- mp_ptr = bgq->getMidplane(coords);
- } catch (const bgsched::InputException& err) {
- int rc = bridge_handle_input_errors(
- "ComputeHardware::getMidplane",
- err.getError().toValue(), NULL);
- if (rc != SLURM_SUCCESS)
+ if (!bgqsys) {
+ if (bg_recover != NOT_FROM_CONTROLLER)
+ fatal("_setup_ba_mp: No ComputeHardware ptr");
+ else {
+ error("_setup_ba_mp: can't talk to the database");
return;
+ }
}
+ if (level > SYSTEM_DIMENSIONS)
+ return;
+
+ if (level < SYSTEM_DIMENSIONS) {
+ for (coords[level] = 0;
+ coords[level] < DIM_SIZE[level];
+ coords[level]++) {
+ /* handle the outter dims here */
+ _setup_ba_mp(level+1, coords, bgqsys);
+ }
+ return;
+ }
+
+ if (!(ba_mp = coord2ba_mp(coords))
+ || !(mp_ptr = bridge_get_midplane(bgqsys, ba_mp)))
+ return;
ba_mp->loc = xstrdup(mp_ptr->getLocation().c_str());
ba_mp->nodecard_loc =
(char **)xmalloc(sizeof(char *) * bg_conf->mp_nodecard_cnt);
for (i=0; i<bg_conf->mp_nodecard_cnt; i++) {
- NodeBoard::ConstPtr nodeboard = mp_ptr->getNodeBoard(i);
- ba_mp->nodecard_loc[i] =
- xstrdup(nodeboard->getLocation().c_str());
+ NodeBoard::ConstPtr nb_ptr = bridge_get_nodeboard(mp_ptr, i);
+ if (nb_ptr)
+ ba_mp->nodecard_loc[i] =
+ xstrdup(nb_ptr->getLocation().c_str());
}
}
@@ -105,6 +136,7 @@
hostlist_t hostlist;
char *node_char = NULL;
char mp_str[256];
+ select_ba_request_t ba_request;
bg_record->magic = BLOCK_MAGIC;
bg_record->bg_block_id = xstrdup(block_ptr->getName().c_str());
@@ -133,16 +165,25 @@
bit_nset(bg_record->ionode_bitmap,
io_start, io_start+io_cnt);
bit_fmt(bitstring, BITSIZE, bg_record->ionode_bitmap);
- bg_record->ionode_str = xstrdup(bitstring);
- debug3("%s uses ionodes %s",
+ ba_set_ionode_str(bg_record);
+ debug3("%s uses cnodes %s",
bg_record->bg_block_id,
bg_record->ionode_str);
bg_record->conn_type[0] = SELECT_SMALL;
} else {
for (Dimension dim=Dimension::A; dim<=Dimension::D; dim++) {
- bg_record->conn_type[dim] =
- block_ptr->isTorus(dim) ?
- SELECT_TORUS : SELECT_MESH;
+ try {
+ bg_record->conn_type[dim] =
+ block_ptr->isTorus(dim) ?
+ SELECT_TORUS : SELECT_MESH;
+ } catch (const bgsched::InputException& err) {
+ bridge_handle_input_errors(
+ "Block::isTorus",
+ err.getError().toValue(),
+ NULL);
+ } catch (...) {
+ error("Unknown error from Block::isTorus.");
+ }
}
/* Set the bitmap blank here if it is a full
node we don't want anything set we also
@@ -154,6 +195,7 @@
hostlist = hostlist_create(NULL);
midplane_vec = block_ptr->getMidplanes();
+ slurm_mutex_lock(&ba_system_mutex);
BOOST_FOREACH(const std::string midplane, midplane_vec) {
char temp[256];
ba_mp_t *curr_mp = loc2ba_mp((char *)midplane.c_str());
@@ -168,13 +210,14 @@
hostlist_push(hostlist, temp);
}
+ slurm_mutex_unlock(&ba_system_mutex);
bg_record->mp_str = hostlist_ranged_string_xmalloc(hostlist);
hostlist_destroy(hostlist);
debug3("got nodes of %s", bg_record->mp_str);
process_nodes(bg_record, true);
- reset_ba_system(true);
+ reset_ba_system(false);
if (ba_set_removable_mps(bg_record->mp_bitmap, 1) != SLURM_SUCCESS)
fatal("It doesn't seem we have a bitmap for %s",
bg_record->bg_block_id);
@@ -184,10 +227,12 @@
else
bg_record->ba_mp_list = list_create(destroy_ba_mp);
- node_char = set_bg_block(bg_record->ba_mp_list,
- bg_record->start,
- bg_record->geo,
- bg_record->conn_type);
+ memset(&ba_request, 0, sizeof(ba_request));
+ memcpy(ba_request.geometry, bg_record->geo, sizeof(bg_record->geo));
+ memcpy(ba_request.conn_type, bg_record->conn_type,
+ sizeof(bg_record->conn_type));
+ node_char = set_bg_block(bg_record->ba_mp_list, &ba_request);
+ memcpy(bg_record->start, ba_request.start, sizeof(bg_record->start));
ba_reset_all_removed_mps();
if (!node_char)
fatal("I was unable to make the requested block.");
@@ -209,12 +254,13 @@
}
#endif
-static int _block_wait_for_jobs(char *bg_block_id)
+static int _block_wait_for_jobs(char *bg_block_id, struct job_record *job_ptr)
{
#ifdef HAVE_BG_FILES
std::vector<Job::ConstPtr> job_vec;
JobFilter job_filter;
JobFilter::Statuses job_statuses;
+ uint32_t job_id = 0;
#endif
if (!bridge_init(NULL))
@@ -225,6 +271,25 @@
return SLURM_ERROR;
}
+ /* This code can be used to simulate having a job hang in the
+ * database.
+ */
+ // if (job_ptr && (job_ptr->magic == JOB_MAGIC)) {
+ // uint32_t job_id = job_ptr->job_id;
+ // while (1) {
+ // debug("waiting on slurm job %u to "
+ // "finish on block %s",
+ // job_id, bg_block_id);
+ // sleep(3);
+ // if (job_ptr->magic != JOB_MAGIC) {
+ // info("bad magic");
+ // break;
+ // } else if (IS_JOB_COMPLETED(job_ptr)) {
+ // info("job completed");
+ // break;
+ // }
+ // }
+ // }
#ifdef HAVE_BG_FILES
job_filter.setComputeBlockName(bg_block_id);
@@ -237,14 +302,39 @@
job_statuses.insert(Job::Cleanup);
job_filter.setStatuses(&job_statuses);
- while (1) {
- job_vec = getJobs(job_filter);
- if (job_vec.empty())
- return SLURM_SUCCESS;
+ if (job_ptr && (job_ptr->magic == JOB_MAGIC)) {
+ char tmp_char[16];
+ job_id = job_ptr->job_id;
+ snprintf(tmp_char, sizeof(tmp_char), "%u", job_id);
+ job_filter.setSchedulerData(tmp_char);
+ }
- BOOST_FOREACH(const Job::ConstPtr& job_ptr, job_vec) {
- debug("waiting on mmcs job %lu to finish on block %s",
- job_ptr->getId(), bg_block_id);
+ while (1) {
+ try {
+ job_vec = getJobs(job_filter);
+ if (job_vec.empty())
+ return SLURM_SUCCESS;
+
+ BOOST_FOREACH(const Job::ConstPtr& job, job_vec) {
+ if (job_id)
+ debug("waiting on mmcs job %lu "
+ "in slurm job %u to "
+ "finish on block %s",
+ job->getId(), job_id,
+ bg_block_id);
+ else
+ debug("waiting on mmcs job %lu to "
+ "finish on block %s",
+ job->getId(), bg_block_id);
+ }
+ } catch (const bgsched::DatabaseException& err) {
+ bridge_handle_database_errors("getJobs",
+ err.getError().toValue());
+ } catch (const bgsched::InternalException& err) {
+ bridge_handle_internal_errors("getJobs",
+ err.getError().toValue());
+ } catch (...) {
+ error("Unknown error from getJobs.");
}
sleep(POLL_INTERVAL);
}
@@ -252,40 +342,43 @@
return SLURM_SUCCESS;
}
-static void _remove_jobs_on_block_and_reset(char *block_id)
+static void _remove_jobs_on_block_and_reset(char *block_id,
+ struct job_record *job_ptr)
{
+ char *mp_str = NULL;
bg_record_t *bg_record = NULL;
int job_remove_failed = 0;
+ slurmctld_lock_t job_read_lock =
+ { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
if (!block_id) {
error("_remove_jobs_on_block_and_reset: no block name given");
return;
}
- if (_block_wait_for_jobs(block_id) != SLURM_SUCCESS)
+ if (_block_wait_for_jobs(block_id, job_ptr) != SLURM_SUCCESS)
job_remove_failed = 1;
/* remove the block's users */
+
+ /* Lock job read before block to avoid
+ * issues where a step could complete after the job completion
+ * has taken place (since we are on a thread here).
+ */
+ if (job_ptr)
+ lock_slurmctld(job_read_lock);
slurm_mutex_lock(&block_state_mutex);
bg_record = find_bg_record_in_list(bg_lists->main, block_id);
if (bg_record) {
- debug("got the record %s user is %s",
- bg_record->bg_block_id,
- bg_record->user_name);
-
if (job_remove_failed) {
if (bg_record->mp_str)
- slurm_drain_nodes(
- bg_record->mp_str,
- (char *)
- "_term_agent: Couldn't remove job",
- slurm_get_slurm_user_id());
+ mp_str = xstrdup(bg_record->mp_str);
else
error("Block %s doesn't have a node list.",
block_id);
}
- bg_reset_block(bg_record);
+ bg_reset_block(bg_record, job_ptr);
} else if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
debug2("Hopefully we are destroying this block %s "
"since it isn't in the bg_lists->main",
@@ -293,6 +386,25 @@
}
slurm_mutex_unlock(&block_state_mutex);
+ if (job_ptr) {
+ if (job_ptr->magic == JOB_MAGIC) {
+ /* This signals the job purger that the job
+ actually finished in the system.
+ */
+ select_jobinfo_t *jobinfo = (select_jobinfo_t *)
+ job_ptr->select_jobinfo->data;
+ jobinfo->bg_record = NULL;
+ }
+ unlock_slurmctld(job_read_lock);
+ }
+
+ /* avoid locking issues just do this afterwards. */
+ if (mp_str) {
+ slurm_drain_nodes(mp_str,
+ (char *)"_term_agent: Couldn't remove job",
+ slurm_get_slurm_user_id());
+ xfree(mp_str);
+ }
}
@@ -301,15 +413,19 @@
if (initialized)
return 1;
- if (bg_recover == NOT_FROM_CONTROLLER)
- return 0;
-
#ifdef HAVE_BG_FILES
if (!properties_file)
properties_file = (char *)"";
- bgsched::init(properties_file);
+ try {
+ bgsched::init(properties_file);
+ } catch (const bgsched::InitializationException& err) {
+ bridge_handle_init_errors("bgsched::init",
+ err.getError().toValue());
+ fatal("can't init bridge");
+ } catch (...) {
+ fatal("Unknown error from bgsched::init, can't continue");
+ }
#endif
- bridge_status_init();
initialized = true;
return 1;
@@ -331,9 +447,16 @@
#ifdef HAVE_BG_FILES
memset(size, 0, sizeof(int) * SYSTEM_DIMENSIONS);
- Coordinates bgq_size = core::getMachineSize();
- for (int dim=0; dim< SYSTEM_DIMENSIONS; dim++)
- size[dim] = bgq_size[dim];
+ try {
+ Coordinates bgq_size = core::getMachineSize();
+ for (int dim=0; dim< SYSTEM_DIMENSIONS; dim++)
+ size[dim] = bgq_size[dim];
+ } catch (const bgsched::DatabaseException& err) {
+ bridge_handle_database_errors("core::getMachineSize",
+ err.getError().toValue());
+ } catch (...) {
+ error("Unknown error from core::getMachineSize");
+ }
#endif
return SLURM_SUCCESS;
@@ -350,16 +473,15 @@
return SLURM_ERROR;
inited = true;
-#ifdef HAVE_BG_FILES
- ComputeHardware::ConstPtr bgq = getComputeHardware();
- for (int a = 0; a < DIM_SIZE[A]; a++)
- for (int x = 0; x < DIM_SIZE[X]; x++)
- for (int y = 0; y < DIM_SIZE[Y]; y++)
- for (int z = 0; z < DIM_SIZE[Z]; z++)
- _setup_ba_mp(
- bgq, &ba_main_grid[a][x][y][z]);
+ slurm_mutex_lock(&ba_system_mutex);
+ assert(ba_main_grid);
+
+#ifdef HAVE_BG_FILES
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ _setup_ba_mp(0, coords, bridge_get_compute_hardware());
#endif
+ slurm_mutex_unlock(&ba_system_mutex);
return SLURM_SUCCESS;
}
@@ -438,40 +560,68 @@
copy of this pointer we need to go out a get the
real one from the system and use it.
*/
+ slurm_mutex_lock(&ba_system_mutex);
ba_mp = coord2ba_mp(ba_mp->coord);
for (i=0; i<bg_conf->mp_nodecard_cnt; i++) {
if (use_nc[i] && ba_mp)
nodecards.push_back(ba_mp->nodecard_loc[i]);
}
+ slurm_mutex_unlock(&ba_system_mutex);
try {
block_ptr = Block::create(nodecards);
+ rc = SLURM_SUCCESS;
} catch (const bgsched::InputException& err) {
rc = bridge_handle_input_errors(
"Block::createSmallBlock",
err.getError().toValue(),
bg_record);
- if (rc != SLURM_SUCCESS)
- return rc;
+ } catch (const bgsched::RuntimeException& err) {
+ rc = bridge_handle_runtime_errors(
+ "Block::createSmallBlock",
+ err.getError().toValue(),
+ bg_record);
+ } catch (...) {
+ error("Unknown Error from Block::createSmallBlock");
+ rc = SLURM_ERROR;
}
+
} else {
- ListIterator itr = list_iterator_create(bg_record->ba_mp_list);
+ ListIterator itr;
+ ba_mp_t *main_mp, *start_mp;
+
+ /* If we are dealing with meshes we always need to
+ have the first midplane added as the start corner.
+ If we don't the API doesn't know what to do. Since
+ we only need this here we only set it here. It
+ never gets freed since it is just a copy.
+ */
+ slurm_mutex_lock(&ba_system_mutex);
+ start_mp = coord2ba_mp(bg_record->start);
+ assert(start_mp);
+ assert(start_mp->loc);
+ midplanes.push_back(start_mp->loc);
+
+ itr = list_iterator_create(bg_record->ba_mp_list);
while ((ba_mp = (ba_mp_t *)list_next(itr))) {
/* Since the midplane locations aren't set up in the
copy of this pointer we need to go out a get the
real one from the system and use it.
*/
- ba_mp_t *main_mp = coord2ba_mp(ba_mp->coord);
- if (!main_mp)
+ main_mp = coord2ba_mp(ba_mp->coord);
+ /* don't add the start_mp again. */
+ if (!main_mp || (main_mp == start_mp))
continue;
- info("got %s(%s) %d", main_mp->coord_str,
- main_mp->loc, ba_mp->used);
+
+ // info("got %s(%s) %d", main_mp->coord_str,
+ // main_mp->loc, ba_mp->used);
if (ba_mp->used)
midplanes.push_back(main_mp->loc);
else
pt_midplanes.push_back(main_mp->loc);
}
list_iterator_destroy(itr);
+ slurm_mutex_unlock(&ba_system_mutex);
for (dim=Dimension::A; dim<=Dimension::D; dim++) {
switch (bg_record->conn_type[dim]) {
@@ -487,27 +637,64 @@
try {
block_ptr = Block::create(midplanes,
pt_midplanes, conn_type);
+ rc = SLURM_SUCCESS;
} catch (const bgsched::InputException& err) {
rc = bridge_handle_input_errors(
"Block::create",
err.getError().toValue(),
bg_record);
- if (rc != SLURM_SUCCESS) {
- assert(0);
- return rc;
- }
+ } catch (...) {
+ error("Unknown Error from Block::createSmallBlock");
+ rc = SLURM_ERROR;
}
}
- info("block created correctly");
- block_ptr->setName(bg_record->bg_block_id);
- block_ptr->setMicroLoaderImage(bg_record->mloaderimage);
+ if (rc != SLURM_SUCCESS) {
+ /* This is needed because sometimes we
+ get a sub midplane system with not
+ all the hardware there. This way
+ we can try to create blocks on all
+ the hardware and the good ones will
+ work and the bad ones will just be
+ removed after everything is done
+ being created.
+ */
+ if (bg_conf->sub_mp_sys)
+ rc = SLURM_SUCCESS;
+ else if (bg_record->conn_type[0] != SELECT_SMALL)
+ assert(0);
+ return rc;
+ }
+
+ debug("block created correctly");
+ try {
+ block_ptr->setName(bg_record->bg_block_id);
+ } catch (const bgsched::InputException& err) {
+ rc = bridge_handle_input_errors("Block::setName",
+ err.getError().toValue(),
+ bg_record);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ } catch (...) {
+ error("Unknown error from Block::setName().");
+ rc = SLURM_ERROR;
+ }
+
+ try {
+ block_ptr->setMicroLoaderImage(bg_record->mloaderimage);
+ } catch (const bgsched::InputException& err) {
+ rc = bridge_handle_input_errors("Block::MicroLoaderImage",
+ err.getError().toValue(),
+ bg_record);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ } catch (...) {
+ error("Unknown error from Block::setMicroLoaderImage().");
+ rc = SLURM_ERROR;
+ }
try {
block_ptr->add("");
- // block_ptr->addUser(bg_record->bg_block_id,
- // bg_record->user_name);
- //info("got past add");
} catch (const bgsched::InputException& err) {
rc = bridge_handle_input_errors("Block::add",
err.getError().toValue(),
@@ -552,6 +739,7 @@
return SLURM_ERROR;
#ifdef HAVE_BG_FILES
+ char *function_name;
/* Lets see if we are connected to the IO. */
try {
uint32_t avail, unavail;
@@ -570,7 +758,7 @@
return rc;
} catch (const bgsched::InternalException& err) {
rc = bridge_handle_internal_errors("Block::checkIOLinksSummary",
- err.getError().toValue());
+ err.getError().toValue());
if (rc != SLURM_SUCCESS)
return rc;
} catch (...) {
@@ -579,51 +767,81 @@
}
try {
- std::vector<std::string> mp_vec;
- if (!Block::isIOConnected(bg_record->bg_block_id, &mp_vec)) {
- error("block %s is not IOConnected, "
+ std::vector<std::string> res_vec;
+#ifdef HAVE_BG_NEW_IO_CHECK
+ std::vector<std::string> unconn_ionode_vec;
+
+ function_name = (char *)"Block::checkIO";
+ Block::checkIO(bg_record->bg_block_id,
+ &unconn_ionode_vec,
+ &res_vec);
+ if (!res_vec.empty()) {
+ error("Block %s is not IOConnected, "
"contact your admin. Midplanes not "
- "connected are ...", bg_record->bg_block_id);
- BOOST_FOREACH(const std::string& mp, mp_vec) {
- error("%s", mp.c_str());
+ "connected ...", bg_record->bg_block_id);
+ slurm_mutex_lock(&ba_system_mutex);
+ BOOST_FOREACH(const std::string& res, res_vec) {
+ ba_mp_t *ba_mp = loc2ba_mp(res.c_str());
+ if (ba_mp)
+ error("%s(%s)",
+ res.c_str(), ba_mp->coord_str);
+ else
+ error("%s", res.c_str());
+ }
+ slurm_mutex_unlock(&ba_system_mutex);
+ return BG_ERROR_NO_IOBLOCK_CONNECTED;
+ }
+#else
+ function_name = (char *)"Block::isIOConnected";
+ if (!Block::isIOConnected(
+ bg_record->bg_block_id, &res_vec)) {
+ error("Using old method, "
+ "block %s is not IOConnected, "
+ "contact your admin. Hardware not "
+ "connected ...", bg_record->bg_block_id);
+ BOOST_FOREACH(const std::string& res, res_vec) {
+ error("%s", res.c_str());
}
return BG_ERROR_NO_IOBLOCK_CONNECTED;
}
+#endif
} catch (const bgsched::DatabaseException& err) {
- rc = bridge_handle_database_errors("Block::isIOConnected",
+ rc = bridge_handle_database_errors(function_name,
err.getError().toValue());
if (rc != SLURM_SUCCESS)
return rc;
} catch (const bgsched::InputException& err) {
- rc = bridge_handle_input_errors("Block::isIOConnected",
+ rc = bridge_handle_input_errors(function_name,
err.getError().toValue(),
bg_record);
if (rc != SLURM_SUCCESS)
return rc;
} catch (const bgsched::InternalException& err) {
- rc = bridge_handle_internal_errors("Block::isIOConnected",
+ rc = bridge_handle_internal_errors(function_name,
err.getError().toValue());
if (rc != SLURM_SUCCESS)
return rc;
} catch (...) {
- error("isIOConnected request failed ... continuing.");
+ error("%s request failed ... continuing.", function_name);
rc = SLURM_ERROR;
}
- if ((rc = bridge_block_remove_all_users(
- bg_record, bg_conf->slurm_user_name)) == REMOVE_USER_ERR) {
+ if ((rc = bridge_block_sync_users(bg_record)) != SLURM_SUCCESS) {
error("bridge_block_remove_all_users: Something "
"happened removing users from block %s",
bg_record->bg_block_id);
return SLURM_ERROR;
- } else if (rc == REMOVE_USER_NONE && bg_conf->slurm_user_name)
- rc = bridge_block_add_user(bg_record, bg_conf->slurm_user_name);
-
- if (rc != SLURM_SUCCESS)
- return SLURM_ERROR;
+ }
try {
+ debug("booting block %s", bg_record->bg_block_id);
Block::initiateBoot(bg_record->bg_block_id);
+ /* Set this here just to make sure we know we
+ are suppose to be booting. Just incase the
+ block goes free before we notice we are
+ configuring.
+ */
+ bg_record->boot_state = 1;
} catch (const bgsched::RuntimeException& err) {
rc = bridge_handle_runtime_errors("Block::initiateBoot",
err.getError().toValue(),
@@ -645,13 +863,8 @@
error("Boot block request failed ... continuing.");
rc = SLURM_ERROR;
}
- /* Set this here just to make sure we know we are suppose to
- be booting. Just incase the block goes free before we
- notice we are configuring.
- */
- bg_record->boot_state = BG_BLOCK_BOOTING;
#else
- info("block %s is ready", bg_record->bg_block_id);
+ debug("block %s is ready", bg_record->bg_block_id);
if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
list_push(bg_lists->booted, bg_record);
bg_record->state = BG_BLOCK_INITED;
@@ -669,7 +882,7 @@
if (!bg_record || !bg_record->bg_block_id)
return SLURM_ERROR;
- info("freeing block %s", bg_record->bg_block_id);
+ debug("freeing block %s", bg_record->bg_block_id);
#ifdef HAVE_BG_FILES
try {
@@ -697,6 +910,7 @@
}
#else
bg_record->state = BG_BLOCK_FREE;
+ last_bg_update = time(NULL);
#endif
return rc;
}
@@ -710,7 +924,7 @@
if (!bg_record || !bg_record->bg_block_id)
return SLURM_ERROR;
- info("removing block %s %p", bg_record->bg_block_id, bg_record);
+ debug("removing block %s %p", bg_record->bg_block_id, bg_record);
#ifdef HAVE_BG_FILES
try {
@@ -740,7 +954,7 @@
return rc;
}
-extern int bridge_block_add_user(bg_record_t *bg_record, char *user_name)
+extern int bridge_block_add_user(bg_record_t *bg_record, const char *user_name)
{
int rc = SLURM_SUCCESS;
if (!bridge_init(NULL))
@@ -749,7 +963,32 @@
if (!bg_record || !bg_record->bg_block_id || !user_name)
return SLURM_ERROR;
- info("adding user %s to block %s", user_name, bg_record->bg_block_id);
+#ifdef HAVE_BG_FILES
+ try {
+ if (Block::isAuthorized(bg_record->bg_block_id, user_name)) {
+ debug2("User %s is already able to run "
+ "jobs on block %s",
+ user_name, bg_record->bg_block_id);
+ return SLURM_SUCCESS;
+ }
+ } catch (const bgsched::InputException& err) {
+ rc = bridge_handle_input_errors("Block::isAuthorized",
+ err.getError().toValue(),
+ bg_record);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ } catch (const bgsched::RuntimeException& err) {
+ rc = bridge_handle_runtime_errors("Block::isAuthorized",
+ err.getError().toValue(),
+ bg_record);
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ } catch(...) {
+ error("isAuthorized user request failed ... continuing.");
+ rc = SLURM_ERROR;
+ }
+#endif
+ debug("adding user %s to block %s", user_name, bg_record->bg_block_id);
#ifdef HAVE_BG_FILES
try {
Block::addUser(bg_record->bg_block_id, user_name);
@@ -773,7 +1012,8 @@
return rc;
}
-extern int bridge_block_remove_user(bg_record_t *bg_record, char *user_name)
+extern int bridge_block_remove_user(bg_record_t *bg_record,
+ const char *user_name)
{
int rc = SLURM_SUCCESS;
if (!bridge_init(NULL))
@@ -782,8 +1022,8 @@
if (!bg_record || !bg_record->bg_block_id || !user_name)
return SLURM_ERROR;
- info("removing user %s from block %s",
- user_name, bg_record->bg_block_id);
+ debug("removing user %s from block %s",
+ user_name, bg_record->bg_block_id);
#ifdef HAVE_BG_FILES
try {
Block::removeUser(bg_record->bg_block_id, user_name);
@@ -807,20 +1047,20 @@
return rc;
}
-extern int bridge_block_remove_all_users(bg_record_t *bg_record,
- char *user_name)
+extern int bridge_block_sync_users(bg_record_t *bg_record)
{
int rc = SLURM_SUCCESS;
#ifdef HAVE_BG_FILES
std::vector<std::string> vec;
vector<std::string>::iterator iter;
+ bool found = 0;
#endif
if (!bridge_init(NULL))
- return SLURM_ERROR;
+ return REMOVE_USER_ERR;
if (!bg_record || !bg_record->bg_block_id)
- return SLURM_ERROR;
+ return REMOVE_USER_ERR;
#ifdef HAVE_BG_FILES
try {
@@ -829,23 +1069,70 @@
bridge_handle_input_errors(
"Block::getUsers",
err.getError().toValue(), bg_record);
- return REMOVE_USER_NONE;
+ return REMOVE_USER_ERR;
} catch (const bgsched::RuntimeException& err) {
bridge_handle_runtime_errors(
"Block::getUsers",
err.getError().toValue(), bg_record);
- return REMOVE_USER_NONE;
+ return REMOVE_USER_ERR;
}
- if (vec.empty())
- return REMOVE_USER_NONE;
+ if (bg_record->job_ptr && (bg_record->job_ptr->magic == JOB_MAGIC)) {
+ select_jobinfo_t *jobinfo = (select_jobinfo_t *)
+ bg_record->job_ptr->select_jobinfo->data;
+ BOOST_FOREACH(const std::string& user, vec) {
+ if (!user.compare(bg_conf->slurm_user_name))
+ continue;
+ if (!user.compare(jobinfo->user_name)) {
+ found = 1;
+ continue;
+ }
+ bridge_block_remove_user(bg_record, user.c_str());
+ }
+ if (!found)
+ bridge_block_add_user(bg_record,
+ jobinfo->user_name);
+ } else if (bg_record->job_list && list_count(bg_record->job_list)) {
+ ListIterator itr = list_iterator_create(bg_record->job_list);
+ struct job_record *job_ptr = NULL;
- BOOST_FOREACH(const std::string& user, vec) {
- if (user_name && (user == user_name))
- continue;
- if ((rc = bridge_block_remove_user(bg_record, user_name)
- != SLURM_SUCCESS))
- break;
+ /* First add all that need to be added removing the
+ * name from the vector as we go.
+ */
+ while ((job_ptr = (struct job_record *)list_next(itr))) {
+ select_jobinfo_t *jobinfo;
+
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("bridge_block_sync_users: "
+ "bad magic found when "
+ "looking at block %s",
+ bg_record->bg_block_id);
+ list_delete_item(itr);
+ continue;
+ }
+
+ jobinfo = (select_jobinfo_t *)
+ job_ptr->select_jobinfo->data;
+ iter = std::find(vec.begin(), vec.end(),
+ jobinfo->user_name);
+ if (iter == vec.end())
+ bridge_block_add_user(bg_record,
+ jobinfo->user_name);
+ else
+ vec.erase(iter);
+ }
+ list_iterator_destroy(itr);
+
+ /* Then remove all that is left */
+ BOOST_FOREACH(const std::string& user, vec) {
+ bridge_block_remove_user(bg_record, user.c_str());
+ }
+ } else {
+ BOOST_FOREACH(const std::string& user, vec) {
+ if (!user.compare(bg_conf->slurm_user_name))
+ continue;
+ bridge_block_remove_user(bg_record, user.c_str());
+ }
}
#endif
@@ -858,7 +1145,6 @@
#ifdef HAVE_BG_FILES
Block::Ptrs vec;
BlockFilter filter;
- uid_t my_uid;
bg_record_t *bg_record = NULL;
info("querying the system for existing blocks");
@@ -866,7 +1152,7 @@
/* Get the midplane info */
filter.setExtendedInfo(true);
- vec = getBlocks(filter, BlockSort::AnyOrder);
+ vec = bridge_get_blocks(filter);
if (vec.empty()) {
debug("No blocks in the current system");
return SLURM_SUCCESS;
@@ -889,7 +1175,13 @@
bg_record = _translate_object_to_block(block_ptr);
slurm_list_append(curr_block_list, bg_record);
}
+
+ /* modifying will be ceared later in the
+ _validate_config_blocks or _delete_old_blocks
+ functions in select_bluegene.c
+ */
bg_record->modifying = 1;
+
/* If we are in error we really just want to get the
new state.
*/
@@ -913,31 +1205,9 @@
if (!bg_recover)
continue;
+ xfree(bg_record->mloaderimage);
bg_record->mloaderimage =
xstrdup(block_ptr->getMicroLoaderImage().c_str());
-
-
- /* If a user is on the block this will be filled in */
- xfree(bg_record->user_name);
- xfree(bg_record->target_name);
- if (block_ptr->getUser() != "")
- bg_record->user_name =
- xstrdup(block_ptr->getUser().c_str());
-
- if (!bg_record->user_name)
- bg_record->user_name =
- xstrdup(bg_conf->slurm_user_name);
-
- if (!bg_record->boot_state)
- bg_record->target_name =
- xstrdup(bg_conf->slurm_user_name);
- else
- bg_record->target_name = xstrdup(bg_record->user_name);
-
- if (uid_from_string(bg_record->user_name, &my_uid) < 0)
- error("uid_from_string(%s): %m", bg_record->user_name);
- else
- bg_record->user_uid = my_uid;
}
slurm_mutex_unlock(&block_state_mutex);
@@ -946,26 +1216,54 @@
return rc;
}
-extern void bridge_reset_block_list(List block_list)
+extern void bridge_block_post_job(char *bg_block_id,
+ struct job_record *job_ptr)
{
- ListIterator itr = NULL;
- bg_record_t *bg_record = NULL;
-
- if (!block_list)
- return;
-
- itr = list_iterator_create(block_list);
- while ((bg_record = (bg_record_t *)list_next(itr))) {
- info("Queue clearing of users of BG block %s",
- bg_record->bg_block_id);
- _remove_jobs_on_block_and_reset(bg_record->bg_block_id);
- }
- list_iterator_destroy(itr);
+ _remove_jobs_on_block_and_reset(bg_block_id, job_ptr);
}
-extern void bridge_block_post_job(char *bg_block_id)
+
+extern uint16_t bridge_block_get_action(char *bg_block_id)
{
- _remove_jobs_on_block_and_reset(bg_block_id);
+ uint16_t action = BG_BLOCK_ACTION_NONE;
+
+#if defined HAVE_BG_FILES && defined HAVE_BG_GET_ACTION
+ BlockFilter filter;
+ Block::Ptrs vec;
+
+ /* This block hasn't been created yet. */
+ if (!bg_block_id)
+ return action;
+
+ filter.setName(string(bg_block_id));
+
+ vec = bridge_get_blocks(filter);
+ if (vec.empty()) {
+ error("bridge_block_get_action: "
+ "block %s not found, this should never happen",
+ bg_block_id);
+ /* block is gone? */
+ return BG_BLOCK_ACTION_NAV;
+ }
+
+ const Block::Ptr &block_ptr = *(vec.begin());
+ action = bridge_translate_action(block_ptr->getAction().toValue());
+#endif
+ return action;
+}
+
+extern int bridge_check_nodeboards(char *mp_loc)
+{
+#ifdef HAVE_BG_FILES
+ NodeBoard::ConstPtrs vec = bridge_get_nodeboards(mp_loc);
+
+ BOOST_FOREACH(const NodeBoard::ConstPtr &nb_ptr, vec) {
+ if (!nb_ptr->isMetaState()
+ && (nb_ptr->getState() != Hardware::Available))
+ return 1;
+ }
+#endif
+ return 0;
}
extern int bridge_set_log_params(char *api_file_name, unsigned int level)
@@ -1025,41 +1323,6 @@
logger_ptr->setLevel(level_ptr);
// Add the appender to the ibm logger.
logger_ptr->addAppender(appender_ptr);
-
- // for (int i=1; i<7; i++) {
- // switch (i) {
- // case 0:
- // level_ptr = log4cxx::Level::getOff();
- // break;
- // case 1:
- // level_ptr = log4cxx::Level::getFatal();
- // break;
- // case 2:
- // level_ptr = log4cxx::Level::getError();
- // break;
- // case 3:
- // level_ptr = log4cxx::Level::getWarn();
- // break;
- // case 4:
- // level_ptr = log4cxx::Level::getInfo();
- // break;
- // case 5:
- // level_ptr = log4cxx::Level::getDebug();
- // break;
- // case 6:
- // level_ptr = log4cxx::Level::getTrace();
- // break;
- // case 7:
- // level_ptr = log4cxx::Level::getAll();
- // break;
- // default:
- // level_ptr = log4cxx::Level::getDebug();
- // break;
- // }
- // if (logger_ptr->isEnabledFor(level_ptr))
- // info("we are doing %d", i);
- // }
-
#endif
return SLURM_SUCCESS;
}
diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
index 0511dd7..4b0d4c8 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
@@ -71,15 +71,31 @@
#if defined HAVE_BG_FILES
+static bool initial_poll = true;
+static bool rt_running = false;
+static bool rt_waiting = false;
+
/*
* Handle compute block status changes as a result of a block allocate.
*/
typedef class event_handler: public bgsched::realtime::ClientEventListener {
public:
/*
+ * Handle a real-time started event.
+ */
+ virtual void handleRealtimeStartedRealtimeEvent(
+ const RealtimeStartedEventInfo& event);
+
+ /*
+ * Handle a real-time ended event.
+ */
+ virtual void handleRealtimeEndedRealtimeEvent(
+ const RealtimeEndedEventInfo& event);
+
+ /*
* Handle a block state changed real-time event.
*/
- void handleBlockStateChangedRealtimeEvent(
+ virtual void handleBlockStateChangedRealtimeEvent(
const BlockStateChangedEventInfo& event);
/*
@@ -100,22 +116,66 @@
virtual void handleNodeBoardStateChangedRealtimeEvent(
const NodeBoardStateChangedEventInfo& event);
- // /*
- // * Handle a cable state changed real-time event.
- // */
- // virtual void handleCableStateChangedRealtimeEvent(
- // const CableStateChangedEventInfo& event);
+ /*
+ * Handle a cnode state changed real-time event.
+ */
+ virtual void handleNodeStateChangedRealtimeEvent(
+ const NodeStateChangedEventInfo& event);
+
+ /*
+ * Handle a cable state changed real-time event.
+ */
+ virtual void handleTorusCableStateChangedRealtimeEvent(
+ const TorusCableStateChangedEventInfo& event);
} event_handler_t;
static List kill_job_list = NULL;
+static pthread_t before_rt_thread;
static pthread_t real_time_thread;
static pthread_t poll_thread;
+static pthread_t action_poll_thread;
static bgsched::realtime::Client *rt_client_ptr = NULL;
pthread_mutex_t rt_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t get_hardware_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* rt_mutex must be locked before calling this. */
+static void _bridge_status_disconnect()
+{
+ try {
+ rt_client_ptr->disconnect();
+ } catch (bgsched::realtime::InternalErrorException& err) {
+ bridge_handle_realtime_internal_errors(
+ "realtime::disconnect", err.getError().toValue());
+ } catch (...) {
+ error("Unknown error from realtime::disconnect");
+ }
+}
+
+/* ba_system_mutex && block_state_mutex must be unlocked before this */
+static void _handle_bad_midplane(char *bg_down_node,
+ EnumWrapper<Hardware::State> state,
+ bool print_debug)
+{
+ assert(bg_down_node);
+
+ if (!node_already_down(bg_down_node)) {
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ error("Midplane %s, state went to '%s', "
+ "marking midplane down.",
+ bg_down_node,
+ bridge_hardware_state_string(state.toValue()));
+ slurm_drain_nodes(
+ bg_down_node,
+ (char *)"select_bluegene: MMCS midplane not UP",
+ slurm_get_slurm_user_id());
+ }
+}
static void _handle_bad_switch(int dim, const char *mp_coords,
- EnumWrapper<Hardware::State> state)
+ EnumWrapper<Hardware::State> state,
+ bool block_state_locked, bool print_debug)
{
char bg_down_node[128];
@@ -125,23 +185,37 @@
bg_conf->slurm_node_prefix, mp_coords);
if (!node_already_down(bg_down_node)) {
- error("Switch at dim '%d' on Midplane %s, state went to %d, "
- "marking midplane down.",
- dim, bg_down_node, state.toValue());
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ error("Switch at dim '%d' on Midplane %s, "
+ "state went to '%s', marking midplane down.",
+ dim, bg_down_node,
+ bridge_hardware_state_string(state.toValue()));
+ /* unlock mutex here since slurm_drain_nodes could produce
+ deadlock */
+ slurm_mutex_unlock(&ba_system_mutex);
+ if (block_state_locked)
+ slurm_mutex_unlock(&block_state_mutex);
slurm_drain_nodes(bg_down_node,
(char *)"select_bluegene: MMCS switch not UP",
slurm_get_slurm_user_id());
+ if (block_state_locked)
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
}
}
-static void _handle_bad_nodeboard(const char *nb_name, const char* mp_coords,
- EnumWrapper<Hardware::State> state)
+/* job_read_lock && ba_system_mutex && block_state_mutex must be
+ * unlocked before this */
+static void _handle_bad_nodeboard(const char *nb_name, char* bg_down_node,
+ EnumWrapper<Hardware::State> state,
+ char *reason, bool print_debug)
{
- char bg_down_node[128];
int io_start;
+ int rc;
assert(nb_name);
- assert(mp_coords);
+ assert(bg_down_node);
/* From the first nodecard id we can figure
out where to start from with the alloc of ionodes.
@@ -160,7 +234,7 @@
nb_name);
else
error("We don't have the system configured "
- "for this nodecard %s, we only have "
+ "for this nodeboard %s, we only have "
"%d ionodes and this starts at %d",
nb_name, bg_conf->ionodes_per_mp, io_start);
return;
@@ -174,115 +248,394 @@
/* we have to handle each nodecard separately to make
sure we don't create holes in the system */
- snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
- bg_conf->slurm_node_prefix, mp_coords);
- if (down_nodecard(bg_down_node, io_start, 0) == SLURM_SUCCESS)
- debug("nodeboard %s on %s is in an error state (%d)",
- nb_name, bg_down_node, state.toValue());
- else
- debug2("nodeboard %s on %s is in an error state (%d), "
- "but error was returned when trying to make it so",
- nb_name, bg_down_node, state.toValue());
+ rc = down_nodecard(bg_down_node, io_start, 0, reason);
+
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME)) {
+ if (rc == SLURM_SUCCESS)
+ debug("nodeboard %s on %s is in an error state '%s'",
+ nb_name, bg_down_node,
+ bridge_hardware_state_string(state.toValue()));
+ else
+ debug2("nodeboard %s on %s is in an error state '%s', "
+ "but error was returned when trying to make "
+ "it so",
+ nb_name, bg_down_node,
+ bridge_hardware_state_string(state.toValue()));
+ }
return;
}
-void event_handler::handleBlockStateChangedRealtimeEvent(
- const BlockStateChangedEventInfo& event)
+/* ba_system_mutex && block_state_mutex must be locked before this */
+static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc,
+ EnumWrapper<Hardware::State> state,
+ List *delete_list, bool print_debug)
{
- bg_record_t *bg_record = NULL;
- const char *bg_block_id = event.getBlockName().c_str();
+ Coordinates ibm_cnode_coords = getNodeMidplaneCoordinates(cnode_loc);
+ uint16_t cnode_coords[Dimension::NodeDims];
+ int inx, set, changed = 0;
+ uint16_t dim;
+ bg_record_t *bg_record;
+ ba_mp_t *found_ba_mp;
+ ListIterator itr, itr2;
+ select_nodeinfo_t *nodeinfo;
+ struct node_record *node_ptr = NULL;
- if (!bg_lists->main)
+ /* This will be handled on the initial poll only */
+ if (!initial_poll && bg_conf->sub_mp_sys
+ && (state == Hardware::Missing))
return;
- slurm_mutex_lock(&block_state_mutex);
- bg_record = find_bg_record_in_list(bg_lists->main, bg_block_id);
- if (!bg_record) {
- slurm_mutex_unlock(&block_state_mutex);
- info("bridge_status: bg_record %s isn't in the main list",
- bg_block_id);
+ if (!ba_mp->cnode_err_bitmap)
+ ba_mp->cnode_err_bitmap = bit_alloc(bg_conf->mp_cnode_cnt);
+
+ for (dim = 0; dim < Dimension::NodeDims; dim++)
+ cnode_coords[dim] = ibm_cnode_coords[dim];
+
+ inx = ba_node_xlate_to_1d(cnode_coords, ba_mp_geo_system);
+ if (inx >= bit_size(ba_mp->cnode_err_bitmap)) {
+ error("trying to set cnode %d but we only have %d",
+ inx, bit_size(ba_mp->cnode_err_bitmap));
return;
}
- bg_status_update_block_state(bg_record,
- bridge_translate_status(event.getStatus()),
- kill_job_list);
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
+ set = bit_test(ba_mp->cnode_err_bitmap, inx);
+ if (bg_conf->sub_mp_sys && (state == Hardware::Missing)) {
+ struct part_record *part_ptr;
+ /* If Missing we are just going to throw any block
+ away so don't set the err bitmap. Remove the
+ hardware from the system instead. */
+ if (node_ptr->cpus >= bg_conf->cpu_ratio)
+ node_ptr->cpus -= bg_conf->cpu_ratio;
+ if (node_ptr->sockets)
+ node_ptr->sockets--;
+ if (node_ptr->real_memory >= 16384)
+ node_ptr->real_memory -= 16384;
- slurm_mutex_unlock(&block_state_mutex);
+ if (bg_conf->actual_cnodes_per_mp)
+ bg_conf->actual_cnodes_per_mp--;
+ itr = list_iterator_create(part_list);
+ while ((part_ptr = (struct part_record *)list_next(itr))) {
+ if (!bit_test(part_ptr->node_bitmap, ba_mp->index))
+ continue;
+ if (part_ptr->total_cpus >= bg_conf->cpu_ratio)
+ part_ptr->total_cpus -= bg_conf->cpu_ratio;
+ }
+ list_iterator_destroy(itr);
- bg_status_process_kill_job_list(kill_job_list);
+ changed = 1;
+ } else if (state != Hardware::Available) {
+ if (!set) {
+ bit_set(ba_mp->cnode_err_bitmap, inx);
+ changed = 1;
+ }
+ } else if (set) {
+ bit_clear(ba_mp->cnode_err_bitmap, inx);
+ changed = 1;
+ }
+ /* If the state is error this could happen after a software
+ error and thus mean it wasn't changed so we need to handle
+ it no matter what.
+ */
+ if (state == Hardware::Error) {
+ int nc_loc = ba_translate_coord2nc(cnode_coords);
+ char nc_name[10];
+ char reason[255];
+ char bg_down_node[128];
+
+ snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
+ bg_conf->slurm_node_prefix, ba_mp->coord_str);
+ snprintf(nc_name, sizeof(nc_name), "N%d", nc_loc);
+ snprintf(reason, sizeof(reason),
+ "_handle_node_change: On midplane %s nodeboard %s "
+ "had cnode %u%u%u%u%u(%s) got into an error state.",
+ bg_down_node,
+ nc_name,
+ cnode_coords[0],
+ cnode_coords[1],
+ cnode_coords[2],
+ cnode_coords[3],
+ cnode_coords[4],
+ cnode_loc.c_str());
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ error("%s", reason);
+ /* unlock mutex here since _handle_bad_nodeboard could produce
+ deadlock */
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+ _handle_bad_nodeboard(nc_name, bg_down_node,
+ state, reason, print_debug);
+ lock_slurmctld(job_read_lock);
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ }
+
+ if (!changed)
+ return;
+ last_bg_update = time(NULL);
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ info("_handle_node_change: state for %s - %s is '%s'",
+ ba_mp->coord_str, cnode_loc.c_str(),
+ bridge_hardware_state_string(state.toValue()));
+
+ assert(node_ptr->select_nodeinfo);
+ nodeinfo = (select_nodeinfo_t *)node_ptr->select_nodeinfo->data;
+ assert(nodeinfo);
+ xfree(nodeinfo->failed_cnodes);
+ nodeinfo->failed_cnodes = ba_node_map_ranged_hostlist(
+ ba_mp->cnode_err_bitmap, ba_mp_geo_system);
+ itr = list_iterator_create(bg_lists->main);
+ while ((bg_record = (bg_record_t *)list_next(itr))) {
+ /* if a block has a free_cnt we still need to apply this */
+ if (!bit_test(bg_record->mp_bitmap, ba_mp->index))
+ continue;
+ itr2 = list_iterator_create(bg_record->ba_mp_list);
+ while ((found_ba_mp = (ba_mp_t *)list_next(itr2))) {
+ float err_ratio;
+ struct job_record *job_ptr = NULL;
+
+ if (found_ba_mp->index != ba_mp->index)
+ continue;
+ if (!found_ba_mp->used)
+ continue;
+ /* perhaps this block isn't involved in this
+ error */
+ if (found_ba_mp->cnode_usable_bitmap) {
+ if (bit_test(found_ba_mp->cnode_usable_bitmap,
+ inx))
+ continue;
+ }
+
+ if (bg_conf->sub_mp_sys
+ && (state == Hardware::Missing)) {
+ if (!*delete_list)
+ *delete_list = list_create(NULL);
+ debug("Removing block %s, "
+ "it has missing cnodes",
+ bg_record->bg_block_id);
+ /* If we don't have any mp_counts
+ * force block removal */
+ bg_record->mp_count = 0;
+ list_push(*delete_list, bg_record);
+ break;
+ }
+
+ if (!found_ba_mp->cnode_err_bitmap)
+ found_ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+
+ if (state != Hardware::Available) {
+ bit_set(found_ba_mp->cnode_err_bitmap, inx);
+ bg_record->cnode_err_cnt++;
+ } else if (set) {
+ bit_clear(found_ba_mp->cnode_err_bitmap, inx);
+ if (bg_record->cnode_err_cnt)
+ bg_record->cnode_err_cnt--;
+ }
+
+ err_ratio = (float)bg_record->cnode_err_cnt
+ / (float)bg_record->cnode_cnt;
+ bg_record->err_ratio = err_ratio * 100;
+
+ /* handle really small ratios */
+ if (!bg_record->err_ratio && bg_record->cnode_err_cnt)
+ bg_record->err_ratio = 1;
+
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_NO_REALTIME))
+ debug("count in error for %s is %u "
+ "with ratio at %u",
+ bg_record->bg_block_id,
+ bg_record->cnode_err_cnt,
+ bg_record->err_ratio);
+
+ /* If the state is available no reason to go
+ * kill jobs so just break out here instead.
+ *
+ * Also if we already issued a free on this
+ * block there could of been a new job added
+ * that is waiting for the block to be freed
+ * so don't go around and fail it before it starts.
+ */
+ if (state == Hardware::Available || bg_record->free_cnt)
+ break;
+
+ if (bg_record->job_ptr)
+ job_ptr = bg_record->job_ptr;
+ else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ ListIterator job_itr = list_iterator_create(
+ bg_record->job_list);
+ while ((job_ptr = (struct job_record *)
+ list_next(job_itr))) {
+ select_jobinfo_t *jobinfo =
+ (select_jobinfo_t *)
+ job_ptr->select_jobinfo->data;
+ /* If no units_avail we are
+ using the whole thing, else
+ check the index.
+ */
+ if (!jobinfo->units_avail
+ || bit_test(jobinfo->units_avail,
+ inx))
+ break;
+ }
+ list_iterator_destroy(job_itr);
+ } else {
+ if (!*delete_list)
+ *delete_list = list_create(NULL);
+ /* If there are no jobs running just
+ free the thing. (This rarely
+ happens when a mmcs job goes into
+ error right after it finishes.
+ Weird, I know.) Here we are going
+ to just remove the block since else
+ wise we could try to free this
+ block over and over again, which
+ only needs to happen once.
+ */
+ if (!block_ptr_exist_in_list(
+ *delete_list, bg_record)) {
+ debug("_handle_node_change: going to "
+ "remove block %s, bad hardware "
+ "and no jobs running",
+ bg_record->bg_block_id);
+ list_push(*delete_list, bg_record);
+ }
+ }
+
+ /* block_state_mutex is locked so handle this later */
+ if (job_ptr && job_ptr->kill_on_node_fail) {
+ kill_job_struct_t *freeit = NULL;
+ ListIterator kill_job_itr =
+ list_iterator_create(kill_job_list);
+ /* Since lots of cnodes could fail at
+ the same time effecting the same
+ job make sure we only add it once
+ since there is no reason to do the
+ same process over and over again.
+ */
+ while ((freeit = (kill_job_struct_t *)
+ list_next(kill_job_itr))) {
+ if (freeit->jobid == job_ptr->job_id)
+ break;
+ }
+ list_iterator_destroy(kill_job_itr);
+
+ if (!freeit) {
+ freeit = (kill_job_struct_t *)
+ xmalloc(sizeof(freeit));
+ freeit->jobid = job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ }
+ }
+
+ break;
+ }
+ list_iterator_destroy(itr2);
+ }
+ list_iterator_destroy(itr);
+}
+
+static void _handle_cable_change(int dim, ba_mp_t *ba_mp,
+ EnumWrapper<Hardware::State> state,
+ List *delete_list, bool print_debug)
+{
+ select_nodeinfo_t *nodeinfo;
+ struct node_record *node_ptr = NULL;
+ char reason[200];
+ ba_mp_t *next_ba_mp = NULL;;
+
+ if (state == Hardware::Available) {
+ /* no change */
+ if (!(ba_mp->axis_switch[dim].usage & BG_SWITCH_CABLE_ERROR))
+ return;
+ next_ba_mp = ba_mp->next_mp[dim];
+
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
+ assert(node_ptr->select_nodeinfo);
+ nodeinfo = (select_nodeinfo_t *)node_ptr->select_nodeinfo->data;
+ assert(nodeinfo);
+
+ ba_mp->axis_switch[dim].usage &= (~BG_SWITCH_CABLE_ERROR_FULL);
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ info("Cable in dim '%u' on Midplane %s, "
+ "has returned to service",
+ dim, ba_mp->coord_str);
+ /* Don't resume any blocks in the error, Admins will
+ do this when they make sure it is ready. Really
+ only matters for static blocks. On a dynamic
+ system no block will be left around if a cable is bad.
+ */
+ snprintf(reason, sizeof(reason),
+ "Cable going from %s -> %s (%d) is not available.\n",
+ ba_mp->coord_str, next_ba_mp->coord_str, dim);
+
+ xstrsubstitute(nodeinfo->extra_info, reason, NULL);
+ if (nodeinfo->extra_info && !strlen(nodeinfo->extra_info))
+ xfree(nodeinfo->extra_info);
+
+ } else if (!(ba_mp->axis_switch[dim].usage & BG_SWITCH_CABLE_ERROR)) {
+ bg_record_t *bg_record = NULL;
+ ListIterator itr;
+
+ next_ba_mp = ba_mp->next_mp[dim];
+
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
+ assert(node_ptr->select_nodeinfo);
+ nodeinfo = (select_nodeinfo_t *)node_ptr->select_nodeinfo->data;
+ assert(nodeinfo);
+
+ ba_mp->axis_switch[dim].usage |= BG_SWITCH_CABLE_ERROR_FULL;
+
+ if (print_debug
+ && !(bg_conf->slurm_debug_flags & DEBUG_FLAG_NO_REALTIME))
+ error("Cable at dim '%d' on Midplane %s, "
+ "state went to '%s', marking cable down.",
+ dim, ba_mp->coord_str,
+ bridge_hardware_state_string(state.toValue()));
+
+ snprintf(reason, sizeof(reason),
+ "Cable going from %s -> %s (%d) is not available.\n",
+ ba_mp->coord_str, next_ba_mp->coord_str, dim);
+ if (nodeinfo->extra_info) {
+ if (!strstr(nodeinfo->extra_info, reason))
+ xstrcat(nodeinfo->extra_info, reason);
+ } else
+ nodeinfo->extra_info = xstrdup(reason);
+
+ /* Now handle potential overlapping blocks. */
+ itr = list_iterator_create(bg_lists->main);
+ while ((bg_record = (bg_record_t *)list_next(itr))) {
+ if (bg_record->destroy)
+ continue;
+ if (bg_record->mp_count == 1)
+ continue;
+ if (!bit_test(bg_record->mp_bitmap, ba_mp->index))
+ continue;
+ if (!bit_test(bg_record->mp_bitmap, next_ba_mp->index))
+ continue;
+ if (!*delete_list)
+ *delete_list = list_create(NULL);
+
+ debug("_handle_cable_change: going to "
+ "remove block %s, bad underlying cable.",
+ bg_record->bg_block_id);
+ list_push(*delete_list, bg_record);
+ }
+ list_iterator_destroy(itr);
+ }
last_bg_update = time(NULL);
}
-void event_handler::handleMidplaneStateChangedRealtimeEvent(
- const MidplaneStateChangedEventInfo& event)
-{
-// const char *midplane = event.getMidplaneId().c_str();
-
-}
-
-void event_handler::handleSwitchStateChangedRealtimeEvent(
- const SwitchStateChangedEventInfo& event)
-{
- const char *mp_name = event.getMidplaneLocation().c_str();
- int dim = event.getDimension();
- ba_mp_t *ba_mp = loc2ba_mp(mp_name);
-
- if (!ba_mp) {
- error("Switch in dim '%d' on Midplane %s, state "
- "went from %d to %d, but is not in our system",
- dim, mp_name,
- event.getPreviousState(),
- event.getState());
- }
-
- if (event.getState() == Hardware::Available) {
- /* Don't do anything, wait for admin to fix things,
- * just note things are better. */
-
- info("Switch in dim '%u' on Midplane %s, "
- "has returned to service",
- dim, mp_name);
- return;
- }
-
- /* Else mark the midplane down */
- _handle_bad_switch(dim, ba_mp->coord_str, event.getState());
-
- return;
-}
-
-void event_handler::handleNodeBoardStateChangedRealtimeEvent(
- const NodeBoardStateChangedEventInfo& event)
-{
- const char *mp_name = event.getLocation().substr(0,6).c_str();
- const char *nb_name = event.getLocation().substr(7,3).c_str();
- ba_mp_t *ba_mp = loc2ba_mp(mp_name);
-
- if (!ba_mp) {
- error("Nodeboard '%s' on Midplane %s, state went from %d to %d,"
- "but is not in our system",
- nb_name, mp_name,
- event.getPreviousState(),
- event.getState());
- }
-
- if (event.getState() == Hardware::Available) {
- /* Don't do anything, wait for admin to fix things,
- * just note things are better. */
-
- info("Nodeboard '%s' on Midplane %s(%s), "
- "has returned to service",
- nb_name, mp_name, ba_mp->coord_str);
- return;
- }
-
- _handle_bad_nodeboard(nb_name, ba_mp->coord_str, event.getState());
-
- return;
-}
static int _real_time_connect(void)
{
@@ -314,19 +667,20 @@
Filter::BlockStatuses block_statuses;
Filter rt_filter(Filter::createNone());
+ rt_filter.setNodes(true);
rt_filter.setNodeBoards(true);
rt_filter.setSwitches(true);
rt_filter.setBlocks(true);
+ rt_filter.setMidplanes(true);
+ rt_filter.setTorusCables(true);
+
block_statuses.insert(Block::Free);
block_statuses.insert(Block::Booting);
block_statuses.insert(Block::Initialized);
block_statuses.insert(Block::Terminating);
rt_filter.setBlockStatuses(&block_statuses);
- // rt_filter.get().setMidplanes(true);
- // rt_filter.get().setCables(true);
-
rt_client_ptr->addListener(event_hand);
rc = _real_time_connect();
@@ -335,18 +689,44 @@
bgsched::realtime::Filter::Id filter_id; // Assigned filter id
slurm_mutex_lock(&rt_mutex);
+ rt_running = 1;
+
if (!bridge_status_inited) {
+ rt_running = 0;
slurm_mutex_unlock(&rt_mutex);
break;
}
if (rc == SLURM_SUCCESS) {
- rt_client_ptr->setFilter(rt_filter, &filter_id, NULL);
- rt_client_ptr->requestUpdates(NULL);
- rt_client_ptr->receiveMessages(NULL, NULL, &failed);
+ /* receiveMessages will set this to false if
+ all is well. Otherwise we did fail.
+ */
+ failed = true;
+ try {
+ rt_client_ptr->setFilter(rt_filter, &filter_id,
+ NULL);
+ rt_client_ptr->requestUpdates(NULL);
+ rt_client_ptr->receiveMessages(NULL, NULL,
+ &failed);
+ } catch (bgsched::realtime::ClientStateException& err) {
+ bridge_handle_input_errors(
+ "RealTime Setup",
+ err.getError().toValue(), NULL);
+ } catch (bgsched::realtime::ConnectionException& err) {
+ bridge_handle_input_errors(
+ "RealTime Setup",
+ err.getError().toValue(), NULL);
+ } catch (bgsched::realtime::ProtocolException& err) {
+ bridge_handle_input_errors(
+ "RealTime Setup",
+ err.getError().toValue(), NULL);
+ } catch (...) {
+ error("RealTime Setup: Unknown error thrown?");
+ }
} else
failed = true;
+ rt_running = 0;
slurm_mutex_unlock(&rt_mutex);
if (bridge_status_inited && failed) {
@@ -364,7 +744,6 @@
static void _do_block_poll(void)
{
-#if defined HAVE_BG_FILES
bg_record_t *bg_record;
ListIterator itr;
int updated = 0;
@@ -372,6 +751,9 @@
if (!bg_lists->main)
return;
+ /* Always lock the slurmctld before locking the
+ * block_state_mutex to avoid deadlock. */
+ lock_slurmctld(job_read_lock);
slurm_mutex_lock(&block_state_mutex);
itr = list_iterator_create(bg_lists->main);
while ((bg_record = (bg_record_t *) list_next(itr))) {
@@ -384,12 +766,11 @@
filter.setName(string(bg_record->bg_block_id));
- vec = getBlocks(filter, BlockSort::AnyOrder);
+ vec = bridge_get_blocks(filter);
if (vec.empty()) {
debug("block %s not found, removing "
"from slurm", bg_record->bg_block_id);
- list_remove(itr);
- destroy_bg_record(bg_record);
+ list_delete_item(itr);
continue;
}
const Block::Ptr &block_ptr = *(vec.begin());
@@ -400,63 +781,205 @@
block_ptr->getStatus().toValue()),
kill_job_list))
updated = 1;
+ if (rt_waiting || slurmctld_config.shutdown_time)
+ break;
}
slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
- bg_status_process_kill_job_list(kill_job_list);
+ bg_status_process_kill_job_list(kill_job_list, 0);
if (updated == 1)
last_bg_update = time(NULL);
-
-#endif
}
-static void _handle_midplane_update(ComputeHardware::ConstPtr bgq,
- ba_mp_t *ba_mp)
+static void _do_block_action_poll(void)
{
- Coordinates::Coordinates coords(ba_mp->coord[A], ba_mp->coord[X],
- ba_mp->coord[Y], ba_mp->coord[Z]);
- Midplane::ConstPtr mp_ptr = bgq->getMidplane(coords);
+ bg_record_t *bg_record;
+ ListIterator itr;
+
+ if (!bg_lists->main)
+ return;
+
+ slurm_mutex_lock(&block_state_mutex);
+ itr = list_iterator_create(bg_lists->main);
+ while ((bg_record = (bg_record_t *) list_next(itr))) {
+ if ((bg_record->magic != BLOCK_MAGIC)
+ || !bg_record->bg_block_id)
+ continue;
+
+ bg_record->action =
+ bridge_block_get_action(bg_record->bg_block_id);
+
+ if (slurmctld_config.shutdown_time)
+ break;
+ }
+ list_iterator_destroy(itr);
+ slurm_mutex_unlock(&block_state_mutex);
+}
+
+/* Even though ba_mp should be coming from the main list
+ * ba_system_mutex && block_state_mutex must be unlocked before
+ * this. Anywhere in this function where ba_mp is used should be
+ * locked.
+ */
+static void _handle_midplane_update(ComputeHardware::ConstPtr bgq,
+ ba_mp_t *ba_mp, List *delete_list)
+{
+ Midplane::ConstPtr mp_ptr = bridge_get_midplane(bgq, ba_mp);
int i;
Dimension dim;
+ char bg_down_node[128];
+
+ if (!mp_ptr) {
+ info("no midplane in the system at %s", ba_mp->coord_str);
+ return;
+ }
+
+ /* Handle this here so we don't have to lock if we don't have too. */
+ slurm_mutex_lock(&ba_system_mutex);
+ snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
+ bg_conf->slurm_node_prefix, ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+
+ if (mp_ptr->getState() != Hardware::Available) {
+ _handle_bad_midplane(bg_down_node, mp_ptr->getState(), 0);
+ /* no reason to continue */
+ return;
+ } else {
+ Node::ConstPtrs vec = bridge_get_midplane_nodes(
+ mp_ptr->getLocation());
+ if (!vec.empty()) {
+ /* This, by far, is the most time consuming
+ process in the polling (especially if there
+ are changes). So lock/unlock on each one
+ so if there are other people waiting for
+ the locks they don't have to wait for all
+ this to finish.
+ */
+ BOOST_FOREACH(const Node::ConstPtr& cnode_ptr, vec) {
+ lock_slurmctld(job_read_lock);
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ _handle_node_change(ba_mp,
+ cnode_ptr->getLocation(),
+ cnode_ptr->getState(),
+ delete_list, 0);
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+ if (rt_waiting
+ || slurmctld_config.shutdown_time)
+ return;
+ }
+ }
+ }
for (i=0; i<16; i++) {
- NodeBoard::ConstPtr nodeboard = mp_ptr->getNodeBoard(i);
- if (nodeboard->getState() != Hardware::Available)
+ NodeBoard::ConstPtr nb_ptr = bridge_get_nodeboard(mp_ptr, i);
+ /* When a cnode is in error state a nodeboard is also
+ set in an error state. Since we want to track on
+ the cnode level and not the nodeboard level we can
+ use the isMetaState option that will tell me of
+ this state. If it isn't set then the nodeboard
+ itself is in an error state so procede.
+ */
+ if (nb_ptr && !nb_ptr->isMetaState()
+ && (nb_ptr->getState() != Hardware::Available)) {
_handle_bad_nodeboard(
- nodeboard->getLocation().substr(7,3).c_str(),
- ba_mp->coord_str, nodeboard->getState());
+ nb_ptr->getLocation().substr(7,3).c_str(),
+ bg_down_node, nb_ptr->getState(), NULL, 0);
+ if (rt_waiting || slurmctld_config.shutdown_time)
+ return;
+ }
}
for (dim=Dimension::A; dim<=Dimension::D; dim++) {
- Switch::ConstPtr my_switch = mp_ptr->getSwitch(dim);
- if (my_switch->getState() != Hardware::Available)
- _handle_bad_switch(dim,
- ba_mp->coord_str,
- my_switch->getState());
+ Switch::ConstPtr switch_ptr = bridge_get_switch(mp_ptr, dim);
+ if (switch_ptr) {
+ if (switch_ptr->getState() != Hardware::Available) {
+ _handle_bad_switch(dim,
+ bg_down_node,
+ switch_ptr->getState(),
+ 1, 0);
+ if (rt_waiting
+ || slurmctld_config.shutdown_time)
+ return;
+ } else {
+ Cable::ConstPtr my_cable =
+ switch_ptr->getCable();
+ /* Dimensions of length 1 do not have a
+ cable. (duh).
+ */
+ if (my_cable) {
+ /* block_state_mutex may be
+ * needed in _handle_cable_change,
+ * so lock it first to avoid
+ * dead lock */
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ _handle_cable_change(
+ dim, ba_mp,
+ my_cable->getState(),
+ delete_list, 0);
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ if (rt_waiting
+ || slurmctld_config.shutdown_time)
+ return;
+ }
+ }
+ }
}
}
-static void _do_hardware_poll(void)
+static void _do_hardware_poll(int level, uint16_t *coords,
+ ComputeHardware::ConstPtr bgqsys)
{
-#if defined HAVE_BG_FILES
- if (!ba_main_grid)
+ ba_mp_t *ba_mp;
+ List delete_list = NULL;
+
+ if (!bgqsys) {
+ error("_do_hardware_poll: No ComputeHardware ptr");
+ return;
+ }
+
+ if (!ba_main_grid || (level > SYSTEM_DIMENSIONS))
return;
- ComputeHardware::ConstPtr bgq = getComputeHardware();
+ if (level < SYSTEM_DIMENSIONS) {
+ for (coords[level] = 0;
+ coords[level] < DIM_SIZE[level];
+ coords[level]++) {
+ /* handle the outter dims here */
+ _do_hardware_poll(level+1, coords, bgqsys);
+ if (rt_waiting || slurmctld_config.shutdown_time)
+ return;
+ }
+ return;
+ }
+ /* We are ignoring locks here to deal with speed.
+ _handle_midplane_update should handle the locks for us when
+ needed. Since the ba_mp list doesn't get destroyed until
+ the very end this should be safe.
+ */
+ if ((ba_mp = coord2ba_mp(coords)))
+ _handle_midplane_update(bgqsys, ba_mp, &delete_list);
- for (int a = 0; a < DIM_SIZE[A]; a++)
- for (int x = 0; x < DIM_SIZE[X]; x++)
- for (int y = 0; y < DIM_SIZE[Y]; y++)
- for (int z = 0; z < DIM_SIZE[Z]; z++)
- _handle_midplane_update(
- bgq, &ba_main_grid[a][x][y][z]);
-#endif
+ bg_status_process_kill_job_list(kill_job_list, 0);
+
+ if (delete_list) {
+ bool delete_it = 0;
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
+ delete_it = 1;
+ free_block_list(NO_VAL, delete_list, delete_it, 0);
+ list_destroy(delete_list);
+ }
}
+
static void *_poll(void *no_data)
{
- event_handler_t event_hand;
- time_t last_ran = time(NULL);
+ static time_t last_ran = 0;
time_t curr_time;
while (bridge_status_inited) {
@@ -468,24 +991,432 @@
}
//debug("polling taking over, realtime is dead");
curr_time = time(NULL);
- _do_block_poll();
+ if (!rt_waiting && blocks_are_created)
+ _do_block_poll();
/* only do every 30 seconds */
- if ((curr_time - 30) >= last_ran)
- _do_hardware_poll();
+ if (!rt_waiting && ((curr_time - 30) >= last_ran)) {
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ _do_hardware_poll(0, coords,
+ bridge_get_compute_hardware());
+ last_ran = time(NULL);
+ }
slurm_mutex_unlock(&rt_mutex);
- last_ran = time(NULL);
+ /* This means we are doing outside of the thread so
+ break */
+ if (initial_poll)
+ break;
sleep(1);
}
+
return NULL;
}
+static void *_block_action_poll(void *no_data)
+{
+ while (bridge_status_inited) {
+ //debug("polling for actions");
+ if (blocks_are_created)
+ _do_block_action_poll();
+ sleep(1);
+ }
+
+ return NULL;
+}
+
+static void *_before_rt_poll(void *no_data)
+{
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ /* To make sure we don't have any missing state */
+ if (!rt_waiting && blocks_are_created)
+ _do_block_poll();
+ /* Since the RealTime server could YoYo this could be called
+ many, many times. bridge_get_compute_hardware is a heavy
+ function so to avoid it being called too many times we will
+ serialize things here.
+ */
+ slurm_mutex_lock(&get_hardware_mutex);
+ if (!rt_waiting)
+ _do_hardware_poll(0, coords, bridge_get_compute_hardware());
+ slurm_mutex_unlock(&get_hardware_mutex);
+
+ return NULL;
+}
+
+void event_handler::handleRealtimeStartedRealtimeEvent(
+ const RealtimeStartedEventInfo& event)
+{
+ if (!rt_running && !rt_waiting) {
+ pthread_attr_t thread_attr;
+ /* If we are in the middle of polling, break out since
+ we are just going to do it again right after.
+ */
+ rt_waiting = 1;
+ slurm_mutex_lock(&rt_mutex);
+ rt_waiting = 0;
+ rt_running = 1;
+ info("RealTime server started back up!");
+ /* Since we need to exit this function for the
+ realtime server to start giving us info spawn a
+ thread that will do it for us in the background.
+ */
+ slurm_attr_init(&thread_attr);
+ if (pthread_create(&before_rt_thread, &thread_attr,
+ _before_rt_poll, NULL))
+ fatal("pthread_create error %m");
+ slurm_attr_destroy(&thread_attr);
+ } else if (rt_waiting)
+ info("Realtime server appears to have gone and come back "
+ "while we were trying to bring it back");
+}
+
+void event_handler::handleRealtimeEndedRealtimeEvent(
+ const RealtimeEndedEventInfo& event)
+{
+ if (rt_running) {
+ rt_running = 0;
+ slurm_mutex_unlock(&rt_mutex);
+ info("RealTime server stopped serving info");
+ } else {
+ info("RealTime server stopped serving info before "
+ "we gave it back control.");
+ }
+}
+
+void event_handler::handleBlockStateChangedRealtimeEvent(
+ const BlockStateChangedEventInfo& event)
+{
+ bg_record_t *bg_record = NULL;
+ const char *bg_block_id = event.getBlockName().c_str();
+
+ if (!bg_lists->main)
+ return;
+
+ /* Always lock the slurmctld before locking the
+ * block_state_mutex to avoid deadlock. */
+ lock_slurmctld(job_read_lock);
+ slurm_mutex_lock(&block_state_mutex);
+ bg_record = find_bg_record_in_list(bg_lists->main, bg_block_id);
+ if (!bg_record) {
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+ debug2("bridge_status: bg_record %s isn't in the main list",
+ bg_block_id);
+ return;
+ }
+
+ bg_status_update_block_state(bg_record,
+ bridge_translate_status(event.getStatus()),
+ kill_job_list);
+
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+
+ bg_status_process_kill_job_list(kill_job_list, 0);
+
+ last_bg_update = time(NULL);
+}
+
+void event_handler::handleMidplaneStateChangedRealtimeEvent(
+ const MidplaneStateChangedEventInfo& event)
+{
+ Coordinates ibm_coords = event.getMidplaneCoordinates();
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ ba_mp_t *ba_mp;
+ int dim;
+ char bg_down_node[128];
+
+ if (event.getPreviousState() == event.getState()) {
+ debug("Switch previous state was same as current (%s - %s)",
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ //return;
+ }
+
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
+ coords[dim] = ibm_coords[dim];
+
+ slurm_mutex_lock(&ba_system_mutex);
+ ba_mp = coord2ba_mp(coords);
+
+ if (!ba_mp) {
+ error("Midplane %s, state went from '%s' to '%s', "
+ "but is not in our system",
+ event.getLocation().c_str(),
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ if (event.getState() == Hardware::Available) {
+ /* Don't do anything, wait for admin to fix things,
+ * just note things are better. */
+
+ info("Midplane %s(%s), has returned to service",
+ event.getLocation().c_str(),
+ ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ /* Else mark the midplane down */
+ snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
+ bg_conf->slurm_node_prefix, ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+
+ _handle_bad_midplane(bg_down_node, event.getState(), 1);
+
+ return;
+
+}
+
+void event_handler::handleSwitchStateChangedRealtimeEvent(
+ const SwitchStateChangedEventInfo& event)
+{
+ Coordinates ibm_coords = event.getMidplaneCoordinates();
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ int dim;
+ ba_mp_t *ba_mp;
+ char bg_down_node[128];
+
+
+ if (event.getPreviousState() == event.getState()) {
+ debug("Switch previous state was same as current (%s - %s)",
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ //return;
+ }
+
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
+ coords[dim] = ibm_coords[dim];
+
+ dim = event.getDimension();
+ slurm_mutex_lock(&ba_system_mutex);
+ ba_mp = coord2ba_mp(coords);
+
+ if (!ba_mp) {
+ error("Switch in dim '%d' on Midplane %s, state "
+ "went from '%s' to '%s', but is not in our system",
+ dim, event.getMidplaneLocation().c_str(),
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ if (event.getState() == Hardware::Available) {
+ /* Don't do anything, wait for admin to fix things,
+ * just note things are better. */
+
+ info("Switch in dim '%u' on Midplane %s(%s), "
+ "has returned to service",
+ dim, event.getMidplaneLocation().c_str(),
+ ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
+ bg_conf->slurm_node_prefix, ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+
+ /* Else mark the midplane down */
+ _handle_bad_switch(dim, bg_down_node, event.getState(), 0, 1);
+
+ return;
+}
+
+void event_handler::handleNodeBoardStateChangedRealtimeEvent(
+ const NodeBoardStateChangedEventInfo& event)
+{
+ const char *mp_name;
+ const char *nb_name;
+ Coordinates ibm_coords = event.getMidplaneCoordinates();
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ int dim;
+ ba_mp_t *ba_mp;
+ char bg_down_node[128];
+
+ if (event.getPreviousState() == event.getState()) {
+ debug("Nodeboard previous state was same as current (%s - %s)",
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ //return;
+ }
+
+ /* When dealing with non-pointers these variables don't work
+ out correctly, so copy them.
+ */
+ mp_name = xstrdup(event.getLocation().substr(0,6).c_str());
+ nb_name = xstrdup(event.getLocation().substr(7,3).c_str());
+
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
+ coords[dim] = ibm_coords[dim];
+
+ slurm_mutex_lock(&ba_system_mutex);
+ ba_mp = coord2ba_mp(coords);
+
+ if (!ba_mp) {
+ error("Nodeboard '%s' on Midplane %s (%s), state went from "
+ "'%s' to '%s', but is not in our system",
+ nb_name, mp_name, event.getLocation().c_str(),
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ xfree(nb_name);
+ xfree(mp_name);
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ if (event.getState() == Hardware::Available) {
+ /* Don't do anything, wait for admin to fix things,
+ * just note things are better. */
+
+ info("Nodeboard '%s' on Midplane %s(%s), "
+ "has returned to service",
+ nb_name, mp_name,
+ ba_mp->coord_str);
+ xfree(nb_name);
+ xfree(mp_name);
+ slurm_mutex_unlock(&ba_system_mutex);
+ return;
+ }
+
+ snprintf(bg_down_node, sizeof(bg_down_node), "%s%s",
+ bg_conf->slurm_node_prefix, ba_mp->coord_str);
+ slurm_mutex_unlock(&ba_system_mutex);
+
+ _handle_bad_nodeboard(nb_name, bg_down_node, event.getState(), NULL, 1);
+ xfree(nb_name);
+ xfree(mp_name);
+
+ return;
+}
+
+void event_handler::handleNodeStateChangedRealtimeEvent(
+ const NodeStateChangedEventInfo& event)
+{
+ Coordinates ibm_coords = event.getMidplaneCoordinates();
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ int dim;
+ ba_mp_t *ba_mp;
+ List delete_list = NULL;
+
+ if (event.getPreviousState() == event.getState()) {
+ debug("Node previous state was same as current (%s - %s)",
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ //return;
+ }
+
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
+ coords[dim] = ibm_coords[dim];
+
+ /* job_read_lock and block_state_mutex may be needed in
+ * _handle_node_change, so lock it first to avoid dead lock */
+ lock_slurmctld(job_read_lock);
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ ba_mp = coord2ba_mp(coords);
+
+ if (!ba_mp) {
+ error("Node '%s' on Midplane %s, state went from '%s' to '%s',"
+ "but is not in our system",
+ event.getLocation().c_str(),
+ event.getLocation().substr(0,6).c_str(),
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+ return;
+ }
+
+ info("Node '%s' on Midplane %s, state went from '%s' to '%s'",
+ event.getLocation().c_str(), ba_mp->coord_str,
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+
+ _handle_node_change(ba_mp, event.getLocation(), event.getState(),
+ &delete_list, 1);
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
+
+ bg_status_process_kill_job_list(kill_job_list, 0);
+
+ if (delete_list) {
+ /* The only reason blocks are added to this list is if
+ there are missing cnodes on the block so remove
+ them from the mix.
+ */
+ free_block_list(NO_VAL, delete_list, 1, 0);
+ list_destroy(delete_list);
+ }
+
+ return;
+}
+
+void event_handler::handleTorusCableStateChangedRealtimeEvent(
+ const TorusCableStateChangedEventInfo& event)
+{
+ Coordinates ibm_coords = event.getFromMidplaneCoordinates();
+ uint16_t coords[SYSTEM_DIMENSIONS];
+ int dim;
+ ba_mp_t *from_ba_mp;
+ List delete_list = NULL;
+
+ if (event.getPreviousState() == event.getState()) {
+ debug("Cable previous state was same as current (%s - %s)",
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ //return;
+ }
+
+ for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++)
+ coords[dim] = ibm_coords[dim];
+
+ dim = event.getDimension();
+
+ /* block_state_mutex may be needed in _handle_cable_change,
+ * so lock it first to avoid dead lock */
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ from_ba_mp = coord2ba_mp(coords);
+ if (!from_ba_mp) {
+ error("Cable in dim '%d' on Midplane %s, state "
+ "went from '%s' to '%s', but is not in our system",
+ dim, event.getFromMidplaneLocation().c_str(),
+ bridge_hardware_state_string(event.getPreviousState()),
+ bridge_hardware_state_string(event.getState()));
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ return;
+ }
+
+ /* Else mark the cable down */
+ _handle_cable_change(dim, from_ba_mp, event.getState(),
+ &delete_list, 1);
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+
+ if (delete_list) {
+ bool delete_it = 0;
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
+ delete_it = 1;
+ free_block_list(NO_VAL, delete_list, delete_it, 0);
+ list_destroy(delete_list);
+ }
+ return;
+}
+
#endif
extern int bridge_status_init(void)
{
if (bridge_status_inited)
- return SLURM_ERROR;
+ return SLURM_SUCCESS;
bridge_status_inited = true;
@@ -495,6 +1426,10 @@
if (!kill_job_list)
kill_job_list = bg_status_create_kill_job_list();
+ /* get initial state */
+ _poll(NULL);
+ initial_poll = false;
+
rt_client_ptr = new(bgsched::realtime::Client);
slurm_attr_init(&thread_attr);
@@ -503,6 +1438,10 @@
slurm_attr_init(&thread_attr);
if (pthread_create(&poll_thread, &thread_attr, _poll, NULL))
fatal("pthread_create error %m");
+ slurm_attr_init(&thread_attr);
+ if (pthread_create(&action_poll_thread, &thread_attr,
+ _block_action_poll, NULL))
+ fatal("pthread_create error %m");
slurm_attr_destroy(&thread_attr);
#endif
return SLURM_SUCCESS;
@@ -514,13 +1453,15 @@
return SLURM_ERROR;
bridge_status_inited = false;
-#if defined HAVE_BG_FILES
- /* make the rt connection end. */
- rt_client_ptr->disconnect();
- if (kill_job_list) {
- list_destroy(kill_job_list);
- kill_job_list = NULL;
+#if defined HAVE_BG_FILES
+ rt_waiting = 1;
+ /* make the rt connection end. */
+ _bridge_status_disconnect();
+
+ if (before_rt_thread) {
+ pthread_join(before_rt_thread, NULL);
+ before_rt_thread = 0;
}
if (real_time_thread) {
@@ -532,12 +1473,82 @@
pthread_join(poll_thread, NULL);
poll_thread = 0;
}
+
+ if (action_poll_thread) {
+ pthread_join(action_poll_thread, NULL);
+ action_poll_thread = 0;
+ }
+
+ if (kill_job_list) {
+ list_destroy(kill_job_list);
+ kill_job_list = NULL;
+ }
+
pthread_mutex_destroy(&rt_mutex);
+ pthread_mutex_destroy(&get_hardware_mutex);
+
delete(rt_client_ptr);
#endif
return SLURM_SUCCESS;
}
+/* This needs to have block_state_mutex locked before hand. */
+extern int bridge_status_update_block_list_state(List block_list)
+{
+ int updated = 0;
+#if defined HAVE_BG_FILES
+ uint16_t real_state, state;
+ bg_record_t *bg_record = NULL;
+ ListIterator itr = NULL;
+
+ itr = list_iterator_create(block_list);
+ while ((bg_record = (bg_record_t *) list_next(itr))) {
+ BlockFilter filter;
+ Block::Ptrs vec;
+ if (!bridge_status_inited)
+ break;
+ else if (bg_record->magic != BLOCK_MAGIC) {
+ /* block is gone */
+ list_remove(itr);
+ continue;
+ } else if (!bg_record->bg_block_id)
+ continue;
+
+ filter.setName(string(bg_record->bg_block_id));
+
+ vec = bridge_get_blocks(filter);
+ if (vec.empty()) {
+ debug("bridge_status_update_block_list_state: "
+ "block %s not found, removing from slurm",
+ bg_record->bg_block_id);
+ /* block is gone? */
+ list_remove(itr);
+ continue;
+ }
+ const Block::Ptr &block_ptr = *(vec.begin());
+
+ real_state = bg_record->state & (~BG_BLOCK_ERROR_FLAG);
+ state = bridge_translate_status(
+ block_ptr->getStatus().toValue());
+
+ if (real_state != state) {
+ if (bg_record->state & BG_BLOCK_ERROR_FLAG)
+ state |= BG_BLOCK_ERROR_FLAG;
+
+ debug("freeing state of Block %s was %s and now is %s",
+ bg_record->bg_block_id,
+ bg_block_state_string(bg_record->state),
+ bg_block_state_string(state));
+
+ bg_record->state = state;
+ updated = 1;
+ }
+ }
+ list_iterator_destroy(itr);
+#endif
+ return updated;
+}
+
/*
* This could potentially lock the node lock in the slurmctld with
* slurm_drain_node, so if slurmctld_locked is called we will call the
diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.h b/src/plugins/select/bluegene/bl_bgq/bridge_status.h
index dcc8c5f..326d3c2 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_status.h
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.h
@@ -41,7 +41,6 @@
#include "bridge_helper.h"
-extern int bridge_status_init(void);
extern int bridge_status_fini(void);
#endif
diff --git a/src/plugins/select/bluegene/bridge_linker.h b/src/plugins/select/bluegene/bridge_linker.h
index eda7cdc..b8fa501 100644
--- a/src/plugins/select/bluegene/bridge_linker.h
+++ b/src/plugins/select/bluegene/bridge_linker.h
@@ -65,6 +65,7 @@
#include "src/common/bitstring.h"
#include "src/common/xstring.h"
#include "src/common/xmalloc.h"
+#include "src/slurmctld/locks.h"
#include "bg_list_functions.h"
#include "bg_enums.h"
@@ -76,9 +77,10 @@
extern bg_lists_t *bg_lists;
extern time_t last_bg_update;
extern pthread_mutex_t block_state_mutex;
-extern pthread_mutex_t request_list_mutex;
extern int blocks_are_created;
extern int num_unused_cpus;
+extern int num_possible_unused_cpus;
+extern slurmctld_lock_t job_read_lock;
extern int bridge_init(char *properties_file);
extern int bridge_fini();
@@ -98,15 +100,20 @@
extern int bridge_block_free(bg_record_t *bg_record);
extern int bridge_block_remove(bg_record_t *bg_record);
-extern int bridge_block_add_user(bg_record_t *bg_record, char *user_name);
-extern int bridge_block_remove_user(bg_record_t *bg_record, char *user_name);
-extern int bridge_block_remove_all_users(bg_record_t *bg_record,
- char *user_name);
+extern int bridge_block_add_user(bg_record_t *bg_record,
+ const char *user_name);
+extern int bridge_block_remove_user(bg_record_t *bg_record,
+ const char *user_name);
+extern int bridge_block_sync_users(bg_record_t *bg_record);
extern int bridge_blocks_load_curr(List curr_block_list);
extern void bridge_reset_block_list(List block_list);
-extern void bridge_block_post_job(char *bg_block_id);
+extern void bridge_block_post_job(char *bg_block_id,
+ struct job_record *job_ptr);
+extern uint16_t bridge_block_get_action(char *bg_block_id);
+extern int bridge_check_nodeboards(char *mp_loc);
+
extern int bridge_set_log_params(char *api_file_name, unsigned int level);
#if defined HAVE_BG_FILES && defined HAVE_BG_L_P
diff --git a/src/plugins/select/bluegene/runjob_plugin.cc b/src/plugins/select/bluegene/runjob_plugin.cc
index 49c23ca..dda34df 100644
--- a/src/plugins/select/bluegene/runjob_plugin.cc
+++ b/src/plugins/select/bluegene/runjob_plugin.cc
@@ -7,8 +7,9 @@
*
*****************************************************************************
* Copyright (C) 2011 Lawrence Livermore National Security.
+ * Copyright (C) 2011 SchedMD LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Danny Auble <auble1@llnl.gov> et. al.
+ * Written by Danny Auble <da@schedmd.com> et. al.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
@@ -45,6 +46,9 @@
# include "config.h"
#endif
#include "src/common/xmalloc.h"
+#include "src/common/list.h"
+#include "src/common/hostlist.h"
+#include "src/common/slurm_protocol_defs.h"
#include <slurm/slurm.h>
}
@@ -57,6 +61,7 @@
#include <boost/thread/mutex.hpp>
#include <boost/foreach.hpp>
+#include <boost/lexical_cast.hpp>
#include <iosfwd>
@@ -85,18 +90,65 @@
boost::mutex _mutex;
};
+typedef struct {
+ char *bg_block_id;
+ pid_t pid; /* The only way we can track things
+ since we don't have a jobid from
+ mmcs in the verify state.
+ */
+ uint32_t job_id;
+ uint32_t step_id;
+ char *total_cnodes;
+} runjob_job_t;
+
+static List runjob_list = NULL;
+static pthread_mutex_t runjob_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void _destroy_runjob_job(void *object)
+{
+ runjob_job_t *runjob_job = (runjob_job_t *)object;
+ if (runjob_job) {
+ xfree(runjob_job->bg_block_id);
+ xfree(runjob_job->total_cnodes);
+ xfree(runjob_job);
+ }
+}
+
+static void _send_failed_cnodes(uint32_t job_id, uint32_t step_id, uint16_t sig)
+{
+ int rc;
+
+ while ((rc = slurm_kill_job_step(job_id, step_id, sig))) {
+ rc = slurm_get_errno();
+
+ if (rc == ESLURM_ALREADY_DONE || rc == ESLURM_INVALID_JOB_ID)
+ break;
+ std::cerr << "Trying to fail cnodes, message from slurmctld: "
+ << slurm_strerror(rc) << std::endl;
+ sleep (5);
+ }
+}
+
+
Plugin::Plugin() :
bgsched::runjob::Plugin(),
_mutex()
{
assert(HIGHEST_DIMENSIONS >= Dimension::NodeDims);
- std::cout << "Slurm runjob plugin loaded" << std::endl;
+ runjob_list = list_create(_destroy_runjob_job);
+
+ std::cout << "Slurm runjob plugin loaded version "
+ << SLURM_VERSION_STRING << std::endl;
}
Plugin::~Plugin()
{
std::cout << "Slurm runjob plugin finished" << std::endl;
+ slurm_mutex_lock(&runjob_list_lock);
+ list_destroy(runjob_list);
+ runjob_list = NULL;
+ slurm_mutex_unlock(&runjob_list_lock);
}
void Plugin::execute(bgsched::runjob::Verify& verify)
@@ -111,22 +163,27 @@
bool sub_block_job = 0;
job_step_info_response_msg_t * step_resp = NULL;
job_step_info_t *step_ptr = NULL;
- uint32_t job_id = NO_VAL, step_id = NO_VAL;
- char *bg_block_id = NULL;
+ runjob_job_t *runjob_job = NULL;
+ char tmp_char[16];
+ std::string message = "Unknown failure";
geo[0] = NO_VAL;
start_coords[0] = NO_VAL;
+ runjob_job = (runjob_job_t *)xmalloc(sizeof(runjob_job_t));
+ runjob_job->job_id = NO_VAL;
+ runjob_job->step_id = NO_VAL;
+
/* Get the job/step id's from the environment and then go
* verify with the slurmctld where this step should be running.
*/
BOOST_FOREACH(const bgsched::runjob::Environment& env_var,
verify.envs()) {
if (env_var.getKey() == "SLURM_JOB_ID") {
- job_id = atoi(env_var.getValue().c_str());
+ runjob_job->job_id = atoi(env_var.getValue().c_str());
found++;
} else if (env_var.getKey() == "SLURM_STEP_ID") {
- step_id = atoi(env_var.getValue().c_str());
+ runjob_job->step_id = atoi(env_var.getValue().c_str());
found++;
}
@@ -134,18 +191,25 @@
break;
}
- if (found != looking_for)
+ if (found != looking_for) {
+ message = "Couldn't find ENV VARS SLURM_JOB_ID and "
+ "SLURM_STEP_ID. Are you out of SLURM? "
+ "Use srun, not runjob.";
goto deny_job;
+ }
- if (slurm_get_job_steps((time_t) 0, job_id, step_id,
+ if (slurm_get_job_steps((time_t) 0, runjob_job->job_id,
+ runjob_job->step_id,
&step_resp, SHOW_ALL)) {
- slurm_perror((char *)"slurm_get_job_steps error");
+ message = "slurm_get_job_steps error";
goto deny_job;
}
if (!step_resp->job_step_count) {
- std::cerr << "No steps match this id "
- << job_id << "." << step_id << std::endl;
+ message = "No steps match this id "
+ + boost::lexical_cast<std::string>(runjob_job->job_id)
+ + "."
+ + boost::lexical_cast<std::string>(runjob_job->step_id);
goto deny_job;
}
@@ -155,43 +219,55 @@
supposed to be running.
*/
if (verify.user().uid() != step_ptr->user_id) {
- std::cerr << "Jobstep " << job_id << "." << step_id
- << " should be ran by uid " << step_ptr->user_id
- << " but it is trying to be ran by "
- << verify.user().uid() << std::endl;
+ message = "Jobstep "
+ + boost::lexical_cast<std::string>(runjob_job->job_id)
+ + "."
+ + boost::lexical_cast<std::string>(runjob_job->step_id)
+ + " should be ran by uid "
+ + boost::lexical_cast<std::string>(step_ptr->user_id)
+ + " but it is trying to be ran by "
+ + boost::lexical_cast<std::string>(verify.user().uid());
goto deny_job;
}
if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
SELECT_JOBDATA_BLOCK_ID,
- &bg_block_id)) {
- std::cerr << "Can't get the block id!" << std::endl;
+ &runjob_job->bg_block_id)) {
+ message = "Can't get the block id!";
goto deny_job;
}
- verify.block(bg_block_id);
- xfree(bg_block_id);
+ verify.block(runjob_job->bg_block_id);
+
+ if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
+ SELECT_JOBDATA_IONODES,
+ &runjob_job->total_cnodes)) {
+ message = "Can't get the cnode string!";
+ goto deny_job;
+ }
if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
SELECT_JOBDATA_BLOCK_NODE_CNT,
&block_cnode_cnt)) {
- std::cerr << "Can't get the block node count!" << std::endl;
+ message = "Can't get the block node count!";
goto deny_job;
}
if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
SELECT_JOBDATA_NODE_CNT,
&step_cnode_cnt)) {
- std::cerr << "Can't get the step node count!" << std::endl;
+ message = "Can't get the step node count!";
goto deny_job;
}
if (!step_cnode_cnt || !block_cnode_cnt) {
- std::cerr << "We didn't get both the step cnode "
- << "count and the block cnode cnt! step="
- << step_cnode_cnt << " block="
- << block_cnode_cnt << std::endl;
+ message = "We didn't get both the step cnode "
+ "count and the block cnode cnt! step="
+ + boost::lexical_cast<std::string>(step_cnode_cnt)
+ + " block="
+ + boost::lexical_cast<std::string>(block_cnode_cnt);
goto deny_job;
- } else if (step_cnode_cnt < block_cnode_cnt) {
+ } else if ((step_cnode_cnt < block_cnode_cnt)
+ && (step_cnode_cnt <= 512)) {
uint16_t dim;
uint16_t tmp_uint16[HIGHEST_DIMENSIONS];
@@ -199,17 +275,19 @@
if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
SELECT_JOBDATA_GEOMETRY,
&tmp_uint16)) {
- std::cerr << "Can't figure out the geo "
- << "given for sub-block job!" << std::endl;
+ message = "Can't figure out the geo "
+ "given for sub-block job!";
goto deny_job;
}
/* since geo is an unsigned (who really knows what
that is depending on the arch) we need to convert
our uint16_t to the unsigned array
*/
- for (dim=0; dim<Dimension::NodeDims; dim++)
+ for (dim=0; dim<Dimension::NodeDims; dim++) {
+ if (tmp_uint16[dim] == (uint16_t)NO_VAL)
+ break;
geo[dim] = tmp_uint16[dim];
-
+ }
/* Since IBM's stuff relies on a relative location we
have stored this information in the conn_type of
the select_jobinfo structure. If you want the
@@ -219,33 +297,43 @@
if (slurm_get_select_jobinfo(step_ptr->select_jobinfo,
SELECT_JOBDATA_CONN_TYPE,
&tmp_uint16)) {
- std::cerr << "Can't figure out the start loc "
- << "for sub-block job!" << std::endl;
+ message = "Can't figure out the start loc "
+ "for sub-block job!";
goto deny_job;
}
- for (dim=0; dim<Dimension::NodeDims; dim++)
+ for (dim=0; dim<Dimension::NodeDims; dim++) {
+ if (tmp_uint16[dim] == (uint16_t)NO_VAL)
+ break;
start_coords[dim] = tmp_uint16[dim];
+ }
}
if (sub_block_job && start_coords[0] != NO_VAL)
verify.corner(bgsched::runjob::Corner(start_coords));
else if (sub_block_job) {
- std::cerr << "No corner given for sub-block job!" << std::endl;
+ message = "No corner given for sub-block job!";
goto deny_job;
}
if (sub_block_job && geo[0] != NO_VAL)
verify.shape(bgsched::runjob::Shape(geo));
else if (sub_block_job) {
- std::cerr << "No shape given for sub-block job!" << std::endl;
+ message = "No shape given for sub-block job!";
goto deny_job;
}
if (verify.block().empty() || (verify.block().length() < 3)) {
- std::cerr << "YOU ARE OUTSIDE OF SLURM!!!!" << std::endl;
+ message = "YOU ARE OUTSIDE OF SLURM!!!!";
goto deny_job;
}
+
+ /* set the scheduler_data to be the job id so we can filter on
+ it when we go to clean up the job in the slurmctld.
+ */
+ snprintf(tmp_char, sizeof(tmp_char), "%u", runjob_job->job_id);
+ verify.scheduler_data(tmp_char);
+
// std::cout << "executable: " << verify.exe() << std::endl;
// std::cout << "args : ";
// std::copy(verify.args().begin(), verify.args().end(),
@@ -268,42 +356,118 @@
// const ProcessTree tree( verify.pid() );
// std::cout << tree << std::endl;
+ runjob_job->pid = verify.pid();
+
+ slurm_mutex_lock(&runjob_list_lock);
+ if (runjob_list)
+ list_append(runjob_list, runjob_job);
+ slurm_mutex_unlock(&runjob_list_lock);
+
slurm_free_job_step_info_response_msg(step_resp);
return;
deny_job:
+ _destroy_runjob_job(runjob_job);
slurm_free_job_step_info_response_msg(step_resp);
- verify.deny_job(bgsched::runjob::Verify::DenyJob::Yes);
+ verify.deny_job(message);
return;
}
void Plugin::execute(const bgsched::runjob::Started& data)
{
boost::lock_guard<boost::mutex> lock( _mutex );
- // std::cout << "runjob " << data.pid()
- // << " started with ID " << data.job() << std::endl;
+ // ListIterator itr = NULL;
+ // runjob_job_t *runjob_job = NULL;
+
+ // slurm_mutex_lock(&runjob_list_lock);
+ // if (runjob_list) {
+ // itr = list_iterator_create(runjob_list);
+ // while ((runjob_job = (runjob_job_t *)list_next(itr))) {
+ // if (runjob_job->pid == data.pid()) {
+ // std::cout << "Slurm step " << runjob_job->job_id
+ // << "." << runjob_job->step_id
+ // << " is IBM ID " << data.job()
+ // << std::endl;
+ // break;
+ // }
+ // }
+ // list_iterator_destroy(itr);
+ // }
+ // slurm_mutex_unlock(&runjob_list_lock);
}
void Plugin::execute(const bgsched::runjob::Terminated& data)
{
+ ListIterator itr = NULL;
+ runjob_job_t *runjob_job = NULL;
+ uint16_t sig = 0;
+
boost::lock_guard<boost::mutex> lock( _mutex );
- // std::cout << "runjob " << data.pid() << " shadowing job "
- // << data.job() << " finished with status "
- // << data.status() << std::endl;
// output failed nodes
const bgsched::runjob::Terminated::Nodes& nodes =
data.software_error_nodes();
- if (!nodes.empty()) {
- /* FIXME: We sould tell the slurmctld about this
- instead of just printing it out.
- */
- std::cerr << nodes.size() << " failed nodes" << std::endl;
- BOOST_FOREACH(const bgsched::runjob::Node& i, nodes) {
- std::cerr << i.location() << ": "
- << i.coordinates() << std::endl;
+
+ slurm_mutex_lock(&runjob_list_lock);
+ if (runjob_list) {
+ itr = list_iterator_create(runjob_list);
+ while ((runjob_job = (runjob_job_t *)list_next(itr))) {
+ if (runjob_job->pid == data.pid()) {
+ // std::cout << "Slurm step " << runjob_job->job_id
+ // << "." << runjob_job->step_id
+ // << ", IBM ID " << data.job()
+ // << " finished with status "
+ // << data.status() << std::endl;
+ list_remove(itr);
+ break;
+ }
}
+ list_iterator_destroy(itr);
}
+ slurm_mutex_unlock(&runjob_list_lock);
+
+ if (!runjob_job) {
+ if (runjob_list)
+ std::cerr << "Couldn't find job running with pid, "
+ << data.pid() << " ID " << data.job()
+ << std::endl;
+ } else if (data.kill_timeout()) {
+ std::cerr << runjob_job->job_id << "." << runjob_job->step_id
+ << " had a kill_timeout()" << std::endl;
+ /* In an older driver this wasn't always caught, so
+ send it.
+ */
+ sig = SIG_NODE_FAIL;
+ } else if (!nodes.empty()) {
+ char tmp_char[6];
+
+ std::cerr << runjob_job->job_id << "." << runjob_job->step_id
+ << " had " << nodes.size() << " nodes fail"
+ << std::endl;
+ BOOST_FOREACH(const bgsched::runjob::Node& i, nodes) {
+ sprintf(tmp_char, "%u%u%u%u%u",
+ i.coordinates().a(),
+ i.coordinates().b(),
+ i.coordinates().c(),
+ i.coordinates().d(),
+ i.coordinates().e());
+ std::cerr << i.location() << ": "
+ << i.coordinates()
+ << tmp_char << std::endl;
+ }
+ } else if (!data.message().empty()) {
+ std::cerr << runjob_job->job_id << "." << runjob_job->step_id
+ << " had a message of '" << data.message()
+ << "'. ("
+ << runjob_job->total_cnodes << ")" << std::endl;
+ } // else if (data.status() == 9)
+ // sig = SIGKILL;
+
+ if (sig)
+ _send_failed_cnodes(
+ runjob_job->job_id, runjob_job->step_id, sig);
+
+ _destroy_runjob_job(runjob_job);
}
extern "C" bgsched::runjob::Plugin* create()
diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c
index 12a70de..d59c0c8 100644
--- a/src/plugins/select/bluegene/select_bluegene.c
+++ b/src/plugins/select/bluegene/select_bluegene.c
@@ -38,18 +38,16 @@
\*****************************************************************************/
#include "src/common/slurm_xlator.h"
-#include "src/common/uid.h"
#include "bg_core.h"
#include "bg_read_config.h"
#include "bg_defined_block.h"
-#ifdef HAVE_BGQ
+#ifndef HAVE_BG_L_P
# include "ba_bgq/block_allocator.h"
#else
# include "ba/block_allocator.h"
#endif
-//#include "src/common/uid.h"
#include "src/slurmctld/trigger_mgr.h"
#include <fcntl.h>
@@ -60,6 +58,7 @@
* overwritten when linking with the slurmctld.
*/
#if defined (__APPLE__)
+slurmctld_config_t slurmctld_config __attribute__((weak_import));
slurm_ctl_conf_t slurmctld_conf __attribute__((weak_import));
struct node_record *node_record_table_ptr __attribute__((weak_import)) = NULL;
int bg_recover __attribute__((weak_import)) = NOT_FROM_CONTROLLER;
@@ -73,6 +72,7 @@
char *slurmctld_cluster_name __attribute__((weak_import)) = NULL;
slurmdb_cluster_rec_t *working_cluster_rec __attribute__((weak_import)) = NULL;
#else
+slurmctld_config_t slurmctld_config;
slurm_ctl_conf_t slurmctld_conf;
struct node_record *node_record_table_ptr = NULL;
int bg_recover = NOT_FROM_CONTROLLER;
@@ -126,6 +126,9 @@
pthread_mutex_t block_state_mutex = PTHREAD_MUTEX_INITIALIZER;
int blocks_are_created = 0;
int num_unused_cpus = 0;
+int num_possible_unused_cpus = 0;
+slurmctld_lock_t job_read_lock = {
+ NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data);
@@ -301,7 +304,9 @@
init_record->job_running =
NO_JOB_RUNNING;
init_record->job_ptr = NULL;
- }
+ } else if (init_record->job_list &&
+ list_count(init_record->job_list))
+ list_flush(init_record->job_list);
list_push(destroy_list, init_record);
}
}
@@ -340,12 +345,10 @@
static bg_record_t *_translate_info_2_record(block_info_t *block_info)
{
- uid_t my_uid;
bg_record_t *bg_record = NULL;
- bitstr_t *mp_bitmap = NULL, *ionode_bitmap = NULL, *used_bitmap = NULL;
+ bitstr_t *mp_bitmap = NULL, *ionode_bitmap = NULL;
mp_bitmap = bit_alloc(node_record_count);
- used_bitmap = bit_alloc(node_record_count);
ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
if (block_info->mp_inx
@@ -353,11 +356,6 @@
error("Job state recovered incompatible with "
"bluegene.conf. mp=%u",
node_record_count);
- if (block_info->mp_used_inx
- && inx2bitstr(used_bitmap, block_info->mp_used_inx) == -1)
- error("Job state recovered incompatible with "
- "bluegene.conf. used=%u",
- node_record_count);
if (block_info->ionode_inx
&& inx2bitstr(ionode_bitmap, block_info->ionode_inx) == -1)
error("Job state recovered incompatible with "
@@ -370,12 +368,20 @@
block_info->bg_block_id = NULL;
bg_record->mp_str = block_info->mp_str;
block_info->mp_str = NULL;
- bg_record->ionode_str = block_info->ionode_str;
- block_info->ionode_str = NULL;
bg_record->ionode_bitmap = ionode_bitmap;
ionode_bitmap = NULL;
- bg_record->mp_used_bitmap = used_bitmap;
- used_bitmap = NULL;
+
+ if (block_info->ionode_str) {
+ ba_set_ionode_str(bg_record);
+ if (!bg_record->ionode_str
+ || strcmp(block_info->ionode_str, bg_record->ionode_str)) {
+ error("block %s didn't compute with the correct "
+ "ionode_str. Stored as '%s' and "
+ "came back as '%s'",
+ bg_record->bg_block_id,
+ block_info->ionode_str, bg_record->ionode_str);
+ }
+ }
bg_record->mp_bitmap = mp_bitmap;
mp_bitmap = NULL;
@@ -388,31 +394,22 @@
*/
bg_record->state = block_info->state;
- bg_record->job_running = block_info->job_running;
- if (bg_record->job_running > NO_JOB_RUNNING)
- bg_record->job_ptr = find_job_record(bg_record->job_running);
- bg_record->job_list = block_info->job_list;
- block_info->job_list = NULL;
-
bg_record->cnode_cnt = block_info->cnode_cnt;
bg_record->mp_count = bit_set_count(bg_record->mp_bitmap);
+ /* Don't copy the job_list from the block_info, we will fill
+ it in later in the job sync.
+ */
+ bg_record->job_running = NO_JOB_RUNNING;
+ if (bg_conf->sub_blocks && (bg_record->mp_count == 1))
+ bg_record->job_list = list_create(NULL);
+
#ifdef HAVE_BGL
bg_record->node_use = block_info->node_use;
#endif
memcpy(bg_record->conn_type, block_info->conn_type,
sizeof(bg_record->conn_type));
- bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
- bg_record->user_name = xstrdup(bg_conf->slurm_user_name);
-
- if (uid_from_string(bg_record->user_name, &my_uid) < 0) {
- error("uid_from_strin(%s): %m",
- bg_record->user_name);
- } else {
- bg_record->user_uid = my_uid;
- }
-
bg_record->blrtsimage = block_info->blrtsimage;
block_info->blrtsimage = NULL;
bg_record->linuximage = block_info->linuximage;
@@ -429,6 +426,26 @@
return bg_record;
}
+static void _local_pack_block_job_info(struct job_record *job_ptr, Buf buffer,
+ uint16_t protocol_version)
+{
+ block_job_info_t block_job;
+ select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data;
+
+ memset(&block_job, 0, sizeof(block_job_info_t));
+ block_job.job_id = job_ptr->job_id;
+ block_job.user_id = job_ptr->user_id;
+ if (jobinfo) {
+ block_job.user_name = jobinfo->user_name;
+ block_job.cnodes = jobinfo->ionode_str;
+ } else
+ error("NO JOBINFO for job %u magic %u!!!!!!!!!!!!!!",
+ job_ptr->job_id, job_ptr->magic);
+
+ /* block_job.cnode_inx -- try not to set */
+ slurm_pack_block_job_info(&block_job, buffer, protocol_version);
+}
+
/* Pack all relevent information about a block */
/* NOTE: There is a matching pack function in
* common/slurm_protocol_pack.c dealing with the block_info_t
@@ -441,11 +458,67 @@
#ifdef HAVE_BGQ
int dim;
#endif
- uint32_t count = NO_VAL;
- block_job_info_t *job;
+ uint32_t count = NO_VAL, running_job = 0;
+ struct job_record *job_ptr;
ListIterator itr;
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ packstr(bg_record->bg_block_id, buffer);
+ packstr(bg_record->blrtsimage, buffer);
+ pack_bit_fmt(bg_record->mp_bitmap, buffer);
+#ifdef HAVE_BGQ
+ pack32(SYSTEM_DIMENSIONS, buffer);
+ for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
+ pack16(bg_record->conn_type[dim], buffer);
+#else
+ pack32(1, buffer); /* for dimensions of conn_type */
+ pack16(bg_record->conn_type[0], buffer);
+#endif
+ packstr(bg_record->ionode_str, buffer);
+ pack_bit_fmt(bg_record->ionode_bitmap, buffer);
+
+ if (bg_record->job_list)
+ count = list_count(bg_record->job_list);
+
+ if (count && count != NO_VAL) {
+ pack32(count, buffer);
+ itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("_pack_block: "
+ "bad magic found when "
+ "packing block %s",
+ bg_record->bg_block_id);
+ list_delete_item(itr);
+ slurm_pack_block_job_info(
+ NULL, buffer,
+ protocol_version);
+ continue;
+ }
+ _local_pack_block_job_info(
+ job_ptr, buffer, protocol_version);
+ }
+ list_iterator_destroy(itr);
+ } else if (bg_record->job_ptr
+ && (bg_record->job_ptr->magic == JOB_MAGIC)) {
+ pack32(1, buffer);
+ _local_pack_block_job_info(
+ bg_record->job_ptr, buffer, protocol_version);
+ } else
+ pack32(count, buffer);
+
+ count = NO_VAL;
+
+ packstr(bg_record->linuximage, buffer);
+ packstr(bg_record->mloaderimage, buffer);
+ packstr(bg_record->mp_str, buffer);
+ pack32(bg_record->cnode_cnt, buffer);
+ pack32(bg_record->cnode_err_cnt, buffer);
+ pack16((uint16_t)bg_record->node_use, buffer);
+ packstr(bg_record->ramdiskimage, buffer);
+ packstr(bg_record->reason, buffer);
+ pack16((uint16_t)bg_record->state, buffer);
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
packstr(bg_record->bg_block_id, buffer);
packstr(bg_record->blrtsimage, buffer);
pack_bit_fmt(bg_record->mp_bitmap, buffer);
@@ -465,26 +538,37 @@
pack32(count, buffer);
if (count && count != NO_VAL) {
itr = list_iterator_create(bg_record->job_list);
- while ((job = list_next(itr))) {
- slurm_pack_block_job_info(job, buffer,
- protocol_version);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("_pack_block 2.3: "
+ "bad magic found when "
+ "packing block %s",
+ bg_record->bg_block_id);
+ list_delete_item(itr);
+ continue;
+ }
+ _local_pack_block_job_info(
+ job_ptr, buffer, protocol_version);
}
list_iterator_destroy(itr);
}
+ if ((count == 1) && running_job)
+ pack32((uint32_t)running_job, buffer);
+ else
+ pack32((uint32_t)bg_record->job_running, buffer);
count = NO_VAL;
- pack32((uint32_t)bg_record->job_running, buffer);
packstr(bg_record->linuximage, buffer);
packstr(bg_record->mloaderimage, buffer);
packstr(bg_record->mp_str, buffer);
- packstr(bg_record->mp_used_str, buffer);
+ packnull(buffer); /* for mp_used_str */
pack32((uint32_t)bg_record->cnode_cnt, buffer);
pack16((uint16_t)bg_record->node_use, buffer);
- packstr(bg_record->user_name, buffer);
+ packnull(buffer); /* for user_name */
packstr(bg_record->ramdiskimage, buffer);
packstr(bg_record->reason, buffer);
pack16((uint16_t)bg_record->state, buffer);
- pack_bit_fmt(bg_record->mp_used_bitmap, buffer);
+ packnull(buffer); /* for mp_used_inx */
} else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
packstr(bg_record->bg_block_id, buffer);
#ifdef HAVE_BGL
@@ -502,7 +586,7 @@
#ifdef HAVE_BGL
pack16((uint16_t)bg_record->node_use, buffer);
#endif
- packstr(bg_record->user_name, buffer);
+ packnull(buffer); /* for user_name */
packstr(bg_record->ramdiskimage, buffer);
packstr(bg_record->reason, buffer);
pack16((uint16_t)bg_record->state, buffer);
@@ -523,7 +607,7 @@
#ifdef HAVE_BGL
pack16((uint16_t)bg_record->node_use, buffer);
#endif
- packstr(bg_record->user_name, buffer);
+ packnull(buffer); /* for user_name */
packstr(bg_record->ramdiskimage, buffer);
pack16((uint16_t)bg_record->state, buffer);
}
@@ -744,6 +828,7 @@
if (_unpack_block_ext(bg_record, buffer, protocol_version)
!= SLURM_SUCCESS) {
+ destroy_bg_record(bg_record);
goto unpack_error;
}
@@ -774,6 +859,7 @@
"around though",
bg_record->bg_block_id);
} else {
+ select_ba_request_t ba_request;
ba_set_removable_mps(usable_mp_bitmap, 1);
/* we want the mps that aren't
* in this record to mark them as used
@@ -790,10 +876,16 @@
#endif
/* info("adding back %s %s", bg_record->bg_block_id, */
/* bg_record->mp_str); */
- name = set_bg_block(results,
- bg_record->start,
- bg_record->geo,
- bg_record->conn_type);
+ memset(&ba_request, 0, sizeof(ba_request));
+ memcpy(ba_request.start, bg_record->start,
+ sizeof(bg_record->start));
+ memcpy(ba_request.geometry, bg_record->geo,
+ sizeof(bg_record->geo));
+ memcpy(ba_request.conn_type, bg_record->conn_type,
+ sizeof(bg_record->conn_type));
+ ba_request.start_req = 1;
+ name = set_bg_block(results, &ba_request);
+
ba_reset_all_removed_mps();
if (!name) {
@@ -849,9 +941,51 @@
slurm_mutex_unlock(&block_state_mutex);
error("Incomplete block data checkpoint file");
free_buf(buffer);
+
return SLURM_FAILURE;
}
+static void _handle_existing_block(bg_record_t *bg_record)
+{
+ char *conn_type;
+ char node_str[256];
+ xassert(bg_record);
+
+ format_node_name(bg_record, node_str, sizeof(node_str));
+ conn_type = conn_type_string_full(bg_record->conn_type);
+ info("Existing: BlockID:%s Nodes:%s Conn:%s",
+ bg_record->bg_block_id, node_str, conn_type);
+ xfree(conn_type);
+ /* Sanity check to make sure we have the correct setup from
+ the save.
+ */
+ if (bg_conf->sub_blocks && bg_record->mp_count == 1) {
+ ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
+ if (!ba_mp->cnode_bitmap) {
+ error("_handle_existing_block: No cnode_bitmap "
+ "for block %s, creating it",
+ bg_record->bg_block_id);
+ if ((ba_mp->cnode_bitmap =
+ ba_create_ba_mp_cnode_bitmap(bg_record))) {
+ if (!ba_mp->cnode_err_bitmap)
+ ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->
+ mp_cnode_cnt);
+ FREE_NULL_BITMAP(ba_mp->cnode_usable_bitmap);
+ ba_mp->cnode_usable_bitmap =
+ bit_copy(ba_mp->cnode_bitmap);
+ }
+ }
+ }
+
+ if (bg_record->state & BG_BLOCK_ERROR_FLAG)
+ put_block_in_error_state(bg_record, NULL);
+ else if (((bg_record->state == BG_BLOCK_INITED)
+ || (bg_record->state == BG_BLOCK_BOOTING))
+ && !block_ptr_exist_in_list(bg_lists->booted, bg_record))
+ list_push(bg_lists->booted, bg_record);
+}
/*
* _validate_config_blocks - Match slurm configuration information with
@@ -883,7 +1017,8 @@
xassert(found_block_list);
/* read in state from last run. */
- rc = _load_state_file(curr_block_list, dir);
+ if (bg_recover)
+ rc = _load_state_file(curr_block_list, dir);
#ifndef HAVE_BG_FILES
if (rc != SLURM_SUCCESS)
@@ -924,21 +1059,9 @@
list_transfer(bg_lists->main, curr_block_list);
itr_conf = list_iterator_create(bg_lists->main);
- while ((bg_record = list_next(itr_conf))) {
- format_node_name(bg_record, tmp_char,
- sizeof(tmp_char));
- info("Existing: BlockID:%s Nodes:%s Conn:%s",
- bg_record->bg_block_id,
- tmp_char,
- conn_type_string(bg_record->conn_type[0]));
- if (bg_record->state & BG_BLOCK_ERROR_FLAG)
- put_block_in_error_state(bg_record, NULL);
- else if (((bg_record->state == BG_BLOCK_INITED)
- || (bg_record->state == BG_BLOCK_BOOTING))
- && !block_ptr_exist_in_list(bg_lists->booted,
- bg_record))
- list_push(bg_lists->booted, bg_record);
- }
+ while ((bg_record = list_next(itr_conf)))
+ _handle_existing_block(bg_record);
+ list_iterator_destroy(itr_conf);
return SLURM_SUCCESS;
}
@@ -997,19 +1120,7 @@
full_created = 1;
list_push(found_block_list, bg_record);
- format_node_name(bg_record, tmp_char,
- sizeof(tmp_char));
- info("Existing: BlockID:%s Nodes:%s Conn:%s",
- bg_record->bg_block_id,
- tmp_char,
- conn_type_string(bg_record->conn_type[0]));
- if (bg_record->state & BG_BLOCK_ERROR_FLAG)
- put_block_in_error_state(bg_record, NULL);
- else if (((bg_record->state == BG_BLOCK_INITED)
- || (bg_record->state == BG_BLOCK_BOOTING))
- && !block_ptr_exist_in_list(bg_lists->booted,
- bg_record))
- list_push(bg_lists->booted, bg_record);
+ _handle_existing_block(bg_record);
}
}
@@ -1019,24 +1130,12 @@
if (init_bg_record->full_block) {
list_remove(itr_curr);
bg_record = init_bg_record;
+
list_append(bg_lists->main, bg_record);
list_push(found_block_list, bg_record);
- format_node_name(bg_record, tmp_char,
- sizeof(tmp_char));
- info("Existing: BlockID:%s Nodes:%s Conn:%s",
- bg_record->bg_block_id,
- tmp_char,
- conn_type_string(bg_record->conn_type[0]));
- if (bg_record->state & BG_BLOCK_ERROR_FLAG)
- put_block_in_error_state(
- bg_record, NULL);
- else if (((bg_record->state
- == BG_BLOCK_INITED)
- || (bg_record->state
- == BG_BLOCK_BOOTING))
- && !block_ptr_exist_in_list(
- bg_lists->booted, bg_record))
- list_push(bg_lists->booted, bg_record);
+
+ _handle_existing_block(bg_record);
+
break;
}
}
@@ -1059,6 +1158,11 @@
if (!my_list)
fatal("malloc failure on list_create");
+ key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("DefaultConnType");
+ key_pair->value = conn_type_string_full(bg_conf->default_conn_type);
+ list_append(my_list, key_pair);
+
#ifndef HAVE_BG_FILES
key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("Emulated");
@@ -1067,6 +1171,11 @@
#endif
key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("MaxBlockInError");
+ key_pair->value = xstrdup_printf("%u", bg_conf->max_block_err);
+ list_append(my_list, key_pair);
+
+ key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("MidPlaneNodeCnt");
key_pair->value = xstrdup_printf("%u", bg_conf->mp_cnode_cnt);
list_append(my_list, key_pair);
@@ -1130,6 +1239,11 @@
}
key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("IONodesPerMP");
+ key_pair->value = xstrdup_printf("%u", bg_conf->ionodes_per_mp);
+ list_append(my_list, key_pair);
+
+ key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("LayoutMode");
switch(bg_conf->layout_mode) {
case LAYOUT_STATIC:
@@ -1157,10 +1271,19 @@
key_pair->value = xstrdup_printf("%u", bg_conf->nodecard_cnode_cnt);
list_append(my_list, key_pair);
- key_pair = xmalloc(sizeof(config_key_pair_t));
- key_pair->name = xstrdup("IONodesPerMP");
- key_pair->value = xstrdup_printf("%u", bg_conf->ionodes_per_mp);
- list_append(my_list, key_pair);
+ if (bg_conf->sub_blocks) {
+ key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("AllowSubBlockAllocations");
+ key_pair->value = xstrdup("Yes");
+ list_append(my_list, key_pair);
+ }
+
+ if (bg_conf->sub_mp_sys) {
+ key_pair = xmalloc(sizeof(config_key_pair_t));
+ key_pair->name = xstrdup("SubMidplaneSystem");
+ key_pair->value = xstrdup("Yes");
+ list_append(my_list, key_pair);
+ }
list_sort(my_list, (ListCmpF) sort_key_pairs);
@@ -1182,7 +1305,7 @@
*/
bg_conf = xmalloc(sizeof(bg_config_t));
/* set some defaults for most systems */
- bg_conf->mp_cnode_cnt = 512;
+ bg_conf->actual_cnodes_per_mp = bg_conf->mp_cnode_cnt = 512;
bg_conf->quarter_cnode_cnt = 128;
bg_conf->nodecard_cnode_cnt = 32;
bg_conf->mp_nodecard_cnt = bg_conf->mp_cnode_cnt
@@ -1263,11 +1386,11 @@
{
int rc = SLURM_SUCCESS;
+ ba_fini();
+
_destroy_bg_config(bg_conf);
_destroy_bg_lists(bg_lists);
- ba_fini();
-
return rc;
}
@@ -1286,6 +1409,9 @@
char *old_file, *new_file, *reg_file;
uint32_t blocks_packed = 0, tmp_offset, block_offset;
Buf buffer = init_buf(BUF_SIZE);
+ slurmctld_lock_t job_read_lock =
+ { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
+
DEF_TIMERS;
debug("bluegene: select_p_state_save");
@@ -1295,6 +1421,9 @@
block_offset = get_buf_offset(buffer);
pack32(blocks_packed, buffer);
+ /* Lock job read before block to avoid deadlock job lock is
+ * needed because we look at the job_ptr's to send job info. */
+ lock_slurmctld(job_read_lock);
/* write block records to buffer */
slurm_mutex_lock(&block_state_mutex);
itr = list_iterator_create(bg_lists->main);
@@ -1310,6 +1439,7 @@
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
tmp_offset = get_buf_offset(buffer);
set_buf_offset(buffer, block_offset);
pack32(blocks_packed, buffer);
@@ -1440,6 +1570,7 @@
/* after we have synced the blocks then we say they are
created. */
blocks_are_created = 1;
+
return rc;
#else
return SLURM_ERROR;
@@ -1452,12 +1583,70 @@
}
/* All initialization is performed by init() */
-extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
+extern int select_p_node_init(struct node_record *node_ptr_array, int node_cnt)
{
#ifdef HAVE_BG
- if (node_cnt>0 && bg_conf)
- if (node_ptr->cpus >= bg_conf->mp_cnode_cnt)
- bg_conf->cpus_per_mp = node_ptr->cpus;
+ int i = 0;
+ uint32_t real_memory, threads, cores;
+
+ if (!node_ptr_array)
+ return SLURM_SUCCESS;
+
+ xassert(bg_conf);
+
+ /* we need the amount of memory for a midplane */
+ real_memory = bg_conf->mp_cnode_cnt;
+
+ /* Set up some knowns that perhaps aren't all the way
+ in the slurm.conf.
+ */
+#ifdef HAVE_BGL
+ threads = 1;
+ cores = 2;
+ real_memory *= 512;
+#elif defined HAVE_BGP
+ threads = 1;
+ cores = 4;
+ real_memory *= 2048;
+#else
+ /* BGQ */
+ threads = 4;
+ cores = 16;
+ real_memory *= 16384;
+#endif
+
+ bg_conf->cpus_per_mp = bg_conf->mp_cnode_cnt * cores;
+
+ for (i = 0; i < node_cnt; i++) {
+ struct node_record *node_ptr = &node_ptr_array[i];
+ select_nodeinfo_t *nodeinfo = NULL;
+
+ if (!node_ptr->name)
+ continue;
+
+ node_ptr->threads = threads;
+ node_ptr->cores = cores;
+ node_ptr->sockets = bg_conf->mp_cnode_cnt;
+ node_ptr->config_ptr->cpus = node_ptr->cpus =
+ bg_conf->cpus_per_mp;
+ node_ptr->real_memory = real_memory;
+
+ xassert(node_ptr->select_nodeinfo);
+ nodeinfo = node_ptr->select_nodeinfo->data;
+ xassert(nodeinfo);
+
+ slurm_mutex_lock(&ba_system_mutex);
+ if (!(nodeinfo->ba_mp = str2ba_mp(node_ptr->name))) {
+ slurm_mutex_unlock(&ba_system_mutex);
+ continue;
+ }
+ nodeinfo->ba_mp->index = i;
+ if (IS_NODE_DOWN(node_ptr) || IS_NODE_DRAIN(node_ptr))
+ ba_update_mp_state(
+ nodeinfo->ba_mp, node_ptr->node_state);
+ nodeinfo->ba_mp->state = node_ptr->node_state;
+ slurm_mutex_unlock(&ba_system_mutex);
+ }
return SLURM_SUCCESS;
#else
@@ -1475,6 +1664,7 @@
/* select_p_node_init needs to be called before this to set
this up correctly
*/
+
if (read_bg_conf() == SLURM_ERROR) {
fatal("Error, could not read the file");
return SLURM_ERROR;
@@ -1484,6 +1674,34 @@
struct part_record *part_ptr = NULL;
ListIterator itr = list_iterator_create(part_list);
while ((part_ptr = list_next(itr))) {
+ char *this_node_name;
+ hostlist_t host_list;
+ part_ptr->total_cpus = 0;
+ if (!part_ptr->nodes) /* no nodes in partition */
+ continue;
+
+ if (!(host_list = hostlist_create(part_ptr->nodes))) {
+ error("hostlist_create error on %s, %m",
+ part_ptr->nodes);
+ continue;
+ }
+
+ while ((this_node_name = hostlist_shift(host_list))) {
+ struct node_record *node_ptr =
+ find_node_record(this_node_name);
+ if (node_ptr == NULL) {
+ error("select_p_block_init: "
+ "invalid node name %s",
+ this_node_name);
+ free(this_node_name);
+ hostlist_destroy(host_list);
+ continue;
+ }
+ free(this_node_name);
+ part_ptr->total_cpus += node_ptr->cpus;
+ }
+ hostlist_destroy(host_list);
+
part_ptr->max_nodes = part_ptr->max_nodes_orig;
part_ptr->min_nodes = part_ptr->min_nodes_orig;
select_p_alter_node_cnt(SELECT_SET_MP_CNT,
@@ -1493,6 +1711,7 @@
}
list_iterator_destroy(itr);
}
+
return SLURM_SUCCESS;
#else
return SLURM_ERROR;
@@ -1571,28 +1790,40 @@
if (bg_record) {
uint32_t job_id = NO_JOB_RUNNING, uid = NO_VAL;
- if (bg_record->job_list) {
- block_job_info_t *job_info;
+ struct job_record *found_job_ptr = NULL;
+
+ if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
ListIterator itr = list_iterator_create(
bg_record->job_list);
xassert(itr);
- while ((job_info = list_next(itr))) {
- if (job_info->job_id
- == job_ptr->job_id) {
- job_id = job_info->job_id;
- uid = job_info->user_id;
- break;
+ while ((found_job_ptr = list_next(itr))) {
+ if (found_job_ptr->magic != JOB_MAGIC) {
+ error("select_p_job_ready: "
+ "bad magic found when "
+ "looking at job %u",
+ job_ptr->job_id);
+ list_delete_item(itr);
+ continue;
}
+
+ if (found_job_ptr->job_id
+ == job_ptr->job_id)
+ break;
}
list_iterator_destroy(itr);
- } else {
- uid = bg_record->user_uid;
- job_id = bg_record->job_running;
+ } else if (bg_record->job_ptr)
+ found_job_ptr = bg_record->job_ptr;
+
+ if (found_job_ptr) {
+ job_id = found_job_ptr->job_id;
+ uid = found_job_ptr->user_id;
}
if (job_id != job_ptr->job_id) {
rc = 0;
- } else if ((uid == job_ptr->user_id)
+ } else if (!bg_record->free_cnt
+ && (uid == job_ptr->user_id)
&& (bg_record->state == BG_BLOCK_INITED)) {
/* Clear the state just incase we
* missed it somehow. */
@@ -1603,7 +1834,6 @@
rc = 0;
else
rc = READY_JOB_ERROR; /* try again */
-
} else {
/* This means the block has been removed and
is no longer valid. This could happen
@@ -1672,11 +1902,12 @@
uint32_t node_count)
{
bitstr_t *picked_mps = NULL;
- bitstr_t *avail_mps = NULL;
bg_record_t *bg_record = NULL;
- char *tmp_char = NULL, *tmp_char2 = NULL;
+ char *tmp_char = NULL;
ba_mp_t *ba_mp = NULL;
select_jobinfo_t *jobinfo = NULL;
+ int dim;
+
xassert(job_ptr);
slurm_mutex_lock(&block_state_mutex);
@@ -1688,30 +1919,52 @@
"assigned to it, but for some reason we are "
"trying to start a step on it?",
job_ptr->job_id);
-
- xassert(bg_record->mp_used_bitmap);
- xassert(!step_jobinfo->units_used);
-
- if (!(avail_mps = bit_copy(bg_record->mp_used_bitmap)))
- fatal("bit_copy malloc failure");
-
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
- tmp_char = bitmap2node_name(avail_mps);
- info("select_p_step_pick_nodes: looking to run a new "
- "step for job %u requesting %u nodes on block %s with "
- "used midplanes %s", job_ptr->job_id, node_count,
- bg_record->bg_block_id, tmp_char);
- xfree(tmp_char);
+ else if (bg_record->magic != BLOCK_MAGIC) {
+ bg_record = find_bg_record_in_list(
+ bg_lists->main, jobinfo->bg_block_id);
+ if (!bg_record || (bg_record->magic != BLOCK_MAGIC)) {
+ error("select_p_step_pick_nodes: "
+ "Whoa, some how we got a bad block for job %u, "
+ "it should be %s but we couldn't find "
+ "it on the system, no step for you, "
+ "and ending job.",
+ job_ptr->job_id, jobinfo->bg_block_id);
+ slurm_mutex_unlock(&block_state_mutex);
+ bg_requeue_job(job_ptr->job_id, 0, 1);
+ return NULL;
+ }
+ error("select_p_step_pick_nodes: Whoa, some how we got a "
+ "bad block for job %u, it should be %s "
+ "(we found it so no big deal, but strange)",
+ job_ptr->job_id, jobinfo->bg_block_id);
+ jobinfo->bg_record = bg_record;
+ } else if ((bg_record->action == BG_BLOCK_ACTION_FREE)
+ && (bg_record->state == BG_BLOCK_INITED)) {
+ /* If we are in the action state of
+ FREE of 'D' since the block won't be able to run any future
+ jobs on it.
+ */
+ info("select_p_step_pick_nodes: "
+ "Already selected block %s can't be used, "
+ "it has an action item of 'D' on it, ending job %u.",
+ bg_record->bg_block_id, job_ptr->job_id);
+ slurm_mutex_unlock(&block_state_mutex);
+ bg_requeue_job(job_ptr->job_id, 0, 1);
+ return NULL;
}
+ xassert(!step_jobinfo->units_used);
+
xfree(step_jobinfo->bg_block_id);
step_jobinfo->bg_block_id = xstrdup(bg_record->bg_block_id);
step_jobinfo->block_cnode_cnt = bg_record->cnode_cnt;
if (((cluster_flags & CLUSTER_FLAG_BGL)
|| (cluster_flags & CLUSTER_FLAG_BGP))
- || (node_count == bg_record->cnode_cnt)) {
- /* If we are using the whole block we need to verify
+ || ((node_count == bg_record->cnode_cnt)
+ || (node_count > bg_conf->mp_cnode_cnt))) {
+ /* If we are using the whole block (or more than 1
+ midplane of it) we need to verify
if anything else is used. If anything else is used
return NULL, else return that we can use the entire
thing.
@@ -1719,10 +1972,11 @@
big the step is since you can only run 1 step per block.
*/
step_jobinfo->dim_cnt = jobinfo->dim_cnt;
- if (bit_ffs(avail_mps) != -1) {
+ if (list_count(job_ptr->step_list)) {
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
info("select_p_step_pick_nodes: Looking "
- "for the entire block %s for job %u, "
+ "for more than one midplane of "
+ "block %s for job %u, "
"but some of it is used.",
bg_record->bg_block_id, job_ptr->job_id);
goto end_it;
@@ -1730,29 +1984,94 @@
if (!(picked_mps = bit_copy(job_ptr->node_bitmap)))
fatal("bit_copy malloc failure");
- if (cluster_flags & CLUSTER_FLAG_BGQ
- && (bg_record->mp_count == 1)) {
- ba_mp = list_peek(bg_record->ba_mp_list);
- xassert(ba_mp);
- if (!ba_mp->cnode_bitmap)
- ba_mp->cnode_bitmap =
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ bitstr_t *used_bitmap;
+ if (node_count > bg_conf->mp_cnode_cnt) {
+ /* Here we have to make sure nothing
+ else is able to run on this block
+ since we are using more than 1
+ midplane but potentially not the
+ entire allocation.
+ */
+ FREE_NULL_BITMAP(jobinfo->units_avail);
+ FREE_NULL_BITMAP(jobinfo->units_used);
+ jobinfo->units_avail =
ba_create_ba_mp_cnode_bitmap(bg_record);
- step_jobinfo->units_used =
- bit_copy(ba_mp->cnode_bitmap);
+ jobinfo->units_used =
+ bit_copy(jobinfo->units_avail);
+ }
+
+ if (jobinfo->units_avail)
+ used_bitmap = jobinfo->units_used;
+ else {
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
+ if (!ba_mp->cnode_bitmap)
+ ba_mp->cnode_bitmap =
+ ba_create_ba_mp_cnode_bitmap(
+ bg_record);
+ used_bitmap = ba_mp->cnode_bitmap;
+ }
+ /* units_used and units_avail will be the
+ same, the exact opposite of used_bitmap.
+ */
+ step_jobinfo->units_used = bit_copy(used_bitmap);
bit_not(step_jobinfo->units_used);
- bit_or(ba_mp->cnode_bitmap, step_jobinfo->units_used);
+ step_jobinfo->units_avail =
+ bit_copy(step_jobinfo->units_used);
+ bit_or(used_bitmap, step_jobinfo->units_used);
}
- bit_or(bg_record->mp_used_bitmap, picked_mps);
step_jobinfo->ionode_str = xstrdup(jobinfo->ionode_str);
- goto found_it;
- } else if ((ba_mp = ba_pick_sub_block_cnodes(
- bg_record, &node_count,
- step_jobinfo))) {
- int dim;
+ } else if (jobinfo->units_avail) {
+ bitstr_t *total_bitmap = jobinfo->units_used;
+ ba_mp = list_peek(bg_record->ba_mp_list);
+ xassert(ba_mp);
+ if (ba_mp->cnode_err_bitmap) {
+ total_bitmap = bit_copy(jobinfo->units_used);
+ bit_or(total_bitmap, ba_mp->cnode_err_bitmap);
+ }
+ /* handle a sub-block allocation where the allocation
+ itself if a small block.
+ */
+ step_jobinfo->cnode_cnt = node_count;
+ if (!(ba_sub_block_in_bitmap(step_jobinfo, total_bitmap, 1))) {
+ if (total_bitmap != jobinfo->units_used)
+ FREE_NULL_BITMAP(total_bitmap);
+ goto end_it;
+ }
+
+ if (total_bitmap != jobinfo->units_used)
+ FREE_NULL_BITMAP(total_bitmap);
+
+ node_count = step_jobinfo->cnode_cnt;
+ if (!(picked_mps = bit_copy(job_ptr->node_bitmap)))
+ fatal("bit_copy malloc failure");
+ bit_or(jobinfo->units_used, step_jobinfo->units_used);
+ for (dim = 0; dim < step_jobinfo->dim_cnt; dim++) {
+ /* The IBM software works off a relative
+ position in the block instead of the
+ absolute position used in SLURM.
+ Since conn_type doesn't mean anything for a
+ step we can just overload it since it is getting
+ sent aready and we don't need to bloat
+ anything if we don't have to.
+
+ So setting it here we can have both
+ absolute and relative.
+
+ We don't need to add here since we are
+ always only dealing with a block that is 1
+ midplane or less.
+ */
+ step_jobinfo->conn_type[dim] =
+ step_jobinfo->start_loc[dim]
+ - bg_record->start_small[dim];
+ }
+ } else if ((ba_mp = ba_sub_block_in_record(
+ bg_record, &node_count, step_jobinfo))) {
if (!(picked_mps = bit_alloc(bit_size(job_ptr->node_bitmap))))
fatal("bit_copy malloc failure");
- bit_set(bg_record->mp_used_bitmap, ba_mp->index);
bit_set(picked_mps, ba_mp->index);
for (dim = 0; dim < step_jobinfo->dim_cnt; dim++) {
/* The IBM software works off a relative
@@ -1762,32 +2081,33 @@
step we can just overload it since it is getting
sent aready and we don't need to bloat
anything if we don't have to.
+
So setting it here we can have both
absolute and relative.
+
+ We add here since if not using the first
+ midplane we have already setup the
+ conn_type to point to the starting point of
+ the relative position in the block.
*/
- step_jobinfo->conn_type[dim] =
+ step_jobinfo->conn_type[dim] +=
step_jobinfo->start_loc[dim]
- - jobinfo->start_loc[dim];
+ - bg_record->start_small[dim];
}
}
-found_it:
if (picked_mps) {
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
tmp_char = bitmap2node_name(picked_mps);
- tmp_char2 = bitmap2node_name(bg_record->mp_used_bitmap);
- info("select_p_step_pick_nodes: picked %s mps on "
- "block %s used is now %s",
- tmp_char, bg_record->bg_block_id,
- tmp_char2);
+ info("select_p_step_pick_nodes: new step for job %u "
+ "will be running on %s(%s)",
+ job_ptr->job_id, bg_record->bg_block_id, tmp_char);
xfree(tmp_char);
- xfree(tmp_char2);
}
step_jobinfo->cnode_cnt = node_count;
}
end_it:
- FREE_NULL_BITMAP(avail_mps);
slurm_mutex_unlock(&block_state_mutex);
@@ -1797,42 +2117,72 @@
extern int select_p_step_finish(struct step_record *step_ptr)
{
bg_record_t *bg_record = NULL;
- select_jobinfo_t *jobinfo = NULL;
+ select_jobinfo_t *jobinfo = NULL, *step_jobinfo = NULL;
int rc = SLURM_SUCCESS;
- char *tmp_char = NULL, *tmp_char2 = NULL;
+ char *tmp_char = NULL;
xassert(step_ptr);
- slurm_mutex_lock(&block_state_mutex);
+
+ if (IS_JOB_COMPLETING(step_ptr->job_ptr)) {
+ debug("step completion %u.%u was received after job "
+ "allocation is already completing, no cleanup needed",
+ step_ptr->job_ptr->job_id, step_ptr->step_id);
+ return SLURM_SUCCESS;
+ }
jobinfo = step_ptr->job_ptr->select_jobinfo->data;
- bg_record = jobinfo->bg_record;
+ step_jobinfo = step_ptr->select_jobinfo->data;
- if (!bg_record)
- fatal("This step %u.%u does not have a bg block "
- "assigned to it, but for some reason we are "
- "trying to end the step?",
- step_ptr->job_ptr->job_id, step_ptr->step_id);
- /* At this moment the step_node_bitmap has already been
- cleared and the step_node_bitmap has been set so use it
- instead.
- */
- bit_not(step_ptr->step_node_bitmap);
- bit_and(bg_record->mp_used_bitmap, step_ptr->step_node_bitmap);
- bit_not(step_ptr->step_node_bitmap);
+ if (step_jobinfo->cnode_cnt > bg_conf->mp_cnode_cnt) {
+ /* This means we were using units_avail and units_used
+ as midplanes not cnodes for either the whole job
+ allocation or a portion of it.
+ */
+ FREE_NULL_BITMAP(jobinfo->units_avail);
+ FREE_NULL_BITMAP(jobinfo->units_used);
+ } else if (jobinfo->units_avail)
+ rc = ba_sub_block_in_bitmap_clear(
+ step_jobinfo, jobinfo->units_used);
+ else {
+ slurm_mutex_lock(&block_state_mutex);
+ bg_record = jobinfo->bg_record;
+
+ if (!bg_record)
+ fatal("This step %u.%u does not have a bg block "
+ "assigned to it, but for some reason we are "
+ "trying to end the step?",
+ step_ptr->job_ptr->job_id, step_ptr->step_id);
+ else if (bg_record->magic != BLOCK_MAGIC) {
+ bg_record = find_bg_record_in_list(
+ bg_lists->main, jobinfo->bg_block_id);
+ if (!bg_record || (bg_record->magic != BLOCK_MAGIC)) {
+ error("select_p_step_finish: "
+ "Whoa, some how we got a bad block "
+ "for job %u, it should be %s but "
+ "we couldn't find it on the system, "
+ "so no real need to clear it up.",
+ step_ptr->job_ptr->job_id,
+ jobinfo->bg_block_id);
+ slurm_mutex_unlock(&block_state_mutex);
+ return SLURM_ERROR;
+ }
+ error("select_p_step_finish: Whoa, some how we "
+ "got a bad block for job %u, it should be %s "
+ "(we found it so no big deal, but strange)",
+ step_ptr->job_ptr->job_id, jobinfo->bg_block_id);
+ jobinfo->bg_record = bg_record;
+ }
+ rc = ba_sub_block_in_record_clear(bg_record, step_ptr);
+ slurm_mutex_unlock(&block_state_mutex);
+ }
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
- tmp_char = bitmap2node_name(bg_record->mp_used_bitmap);
- tmp_char2 = bitmap2node_name(step_ptr->step_node_bitmap);
- info("select_p_step_finish: cleared %s "
- "from job %u, now %s used",
- tmp_char2, step_ptr->job_ptr->job_id, tmp_char);
+ tmp_char = bitmap2node_name(step_ptr->step_node_bitmap);
+ info("select_p_step_finish: step %u.%u cleared from %s",
+ step_ptr->job_ptr->job_id, step_ptr->step_id, tmp_char);
xfree(tmp_char);
- xfree(tmp_char2);
}
- rc = ba_clear_sub_block_cnodes(bg_record, step_ptr);
-
- slurm_mutex_unlock(&block_state_mutex);
return rc;
}
@@ -1861,6 +2211,14 @@
if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
if (bg_lists->main) {
+ slurmctld_lock_t job_read_lock =
+ { NO_LOCK, READ_LOCK,
+ NO_LOCK, NO_LOCK };
+ /* Lock job read before block to avoid
+ * deadlock job lock is needed because
+ * we look at the job_ptr's to send
+ * job info. */
+ lock_slurmctld(job_read_lock);
slurm_mutex_lock(&block_state_mutex);
itr = list_iterator_create(bg_lists->main);
while ((bg_record = list_next(itr))) {
@@ -1872,6 +2230,7 @@
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
+ unlock_slurmctld(job_read_lock);
} else {
error("select_p_pack_select_info: "
"no bg_lists->main");
@@ -1922,6 +2281,9 @@
extern int select_p_select_nodeinfo_set_all(time_t last_query_time)
{
+ if (bg_recover != NOT_FROM_CONTROLLER)
+ bridge_status_init();
+
return select_nodeinfo_set_all(last_query_time);
}
@@ -1998,6 +2360,9 @@
int rc = SLURM_SUCCESS;
bg_record_t *bg_record = NULL;
char reason[200];
+ List kill_job_list = NULL;
+ kill_job_struct_t *freeit;
+ ListIterator itr;
if (!block_desc_ptr->bg_block_id) {
error("update_block: No name specified");
@@ -2008,6 +2373,8 @@
bg_record = find_bg_record_in_list(bg_lists->main,
block_desc_ptr->bg_block_id);
if (!bg_record) {
+ error("update_block: block %s not found",
+ block_desc_ptr->bg_block_id);
slurm_mutex_unlock(&block_state_mutex);
return ESLURM_INVALID_BLOCK_NAME;
}
@@ -2047,10 +2414,38 @@
bg_block_state_string(state));
}
- /* First fail any job running on this block */
- if (bg_record->job_running > NO_JOB_RUNNING) {
+ /* First fail any job running on this block (Not for resume though) */
+ if (block_desc_ptr->state != BG_BLOCK_TERM) {
+ if (bg_record->job_running > NO_JOB_RUNNING) {
+ if (!kill_job_list)
+ kill_job_list =
+ bg_status_create_kill_job_list();
+ freeit = xmalloc(sizeof(kill_job_struct_t));
+ freeit->jobid = bg_record->job_running;
+ list_push(kill_job_list, freeit);
+ } else if (bg_record->job_list
+ && list_count(bg_record->job_list)) {
+ struct job_record *job_ptr;
+ if (!kill_job_list)
+ kill_job_list =
+ bg_status_create_kill_job_list();
+ itr = list_iterator_create(bg_record->job_list);
+ while ((job_ptr = list_next(itr))) {
+ if (job_ptr->magic != JOB_MAGIC)
+ continue;
+ freeit = xmalloc(sizeof(kill_job_struct_t));
+ freeit->jobid = job_ptr->job_id;
+ list_push(kill_job_list, freeit);
+ }
+ list_iterator_destroy(itr);
+ }
+ }
+
+ if (kill_job_list) {
slurm_mutex_unlock(&block_state_mutex);
- bg_requeue_job(bg_record->job_running, 0);
+ bg_status_process_kill_job_list(kill_job_list, 0);
+ list_destroy(kill_job_list);
+ kill_job_list = NULL;
slurm_mutex_lock(&block_state_mutex);
if (!block_ptr_exist_in_list(bg_lists->main, bg_record)) {
slurm_mutex_unlock(&block_state_mutex);
@@ -2058,18 +2453,13 @@
"error state it disappeared");
return SLURM_ERROR;
}
- /* need to set the job_ptr to NULL
- here or we will get error message
- about us trying to free this block
- with a job in it.
- */
- bg_record->job_ptr = NULL;
}
if (block_desc_ptr->state == BG_BLOCK_ERROR_FLAG) {
bg_record_t *found_record = NULL;
- ListIterator itr;
List delete_list = list_create(NULL);
+ bool delete_it = 0;
+
/* This loop shouldn't do much in regular Dynamic mode
since there shouldn't be overlapped blocks. But if
there is a trouble block that isn't going away and
@@ -2116,11 +2506,14 @@
found_record->bg_block_id,
bg_record->bg_block_id);
}
+ resume_block(found_record);
list_push(delete_list, found_record);
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
- free_block_list(NO_VAL, delete_list, 0, 0);
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
+ delete_it = 1;
+ free_block_list(NO_VAL, delete_list, delete_it, 0);
list_destroy(delete_list);
put_block_in_error_state(bg_record, reason);
} else if (block_desc_ptr->state == BG_BLOCK_FREE) {
@@ -2176,14 +2569,15 @@
info("Pending job %u on block %s "
"will try to be requeued "
"because overlapping block %s "
- "is in an error state.",
+ "is being removed.",
found_record->job_running,
found_record->bg_block_id,
bg_record->bg_block_id);
else
- info("Failing job %u on block %s "
+ info("Running job %u on block %s "
+ "will try to be requeued "
"because overlapping block %s "
- "is in an error state.",
+ "is being removed.",
found_record->job_running,
found_record->bg_block_id,
bg_record->bg_block_id);
@@ -2191,6 +2585,45 @@
free_block_list code below, just
make note of it here.
*/
+ } else if (found_record->job_list &&
+ list_count(found_record->job_list)) {
+ struct job_record *job_ptr = NULL;
+ ListIterator job_itr = list_iterator_create(
+ found_record->job_list);
+ while ((job_ptr = list_next(job_itr))) {
+ if (job_ptr->magic != JOB_MAGIC) {
+ error("select_p_update_block: "
+ "bad magic found when "
+ "looking at block %s",
+ found_record->
+ bg_block_id);
+ list_delete_item(itr);
+ continue;
+ }
+ if (IS_JOB_CONFIGURING(job_ptr))
+ info("Pending job %u on "
+ "block %s "
+ "will try to be requeued "
+ "because related block %s "
+ "is in an error state.",
+ job_ptr->job_id,
+ found_record->bg_block_id,
+ bg_record->bg_block_id);
+ else
+ info("Running job %u on "
+ "block %s "
+ "will try to be requeued "
+ "because related block %s "
+ "is being removed.",
+ job_ptr->job_id,
+ found_record->bg_block_id,
+ bg_record->bg_block_id);
+ /* This job will be requeued in the
+ free_block_list code below, just
+ make note of it here.
+ */
+ }
+ list_iterator_destroy(job_itr);
} else {
debug2("block %s is part of to be freed %s "
"but no running job",
@@ -2212,7 +2645,7 @@
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
- free_block_list(NO_VAL, delete_list, 0, 0);
+ free_block_list(NO_VAL, delete_list, 1, 0);
list_destroy(delete_list);
} else if (block_desc_ptr->state == BG_BLOCK_BOOTING) {
/* This means recreate the block, remove it and then
@@ -2262,7 +2695,8 @@
bg_err_str(rc));
}
} else
- if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
+ if (bg_conf->slurm_debug_flags
+ & DEBUG_FLAG_SELECT_TYPE)
info("select_p_update_block: done %s",
(char *)bg_record->bg_block_id);
#endif
@@ -2408,7 +2842,8 @@
/* find first bit in nc */
int start_io =
(int)nc_pos * bg_conf->io_ratio;
- down_nodecard(node_name, start_io, 0);
+ down_nodecard(node_name, start_io,
+ 0, NULL);
last_pos = nc_pos;
}
}
@@ -2435,6 +2870,121 @@
#endif
}
+/* While the realtime server should get all the cnode state changes on
+ * older versions of the IBM driver if a job has a timeout it doesn't
+ * always happen. So what happens is the runjob_mux will now send a
+ * nice cancel to the slurmctld to make sure it gets marked.
+ */
+extern int select_p_fail_cnode(struct step_record *step_ptr)
+{
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+ bg_record_t *bg_record;
+ select_nodeinfo_t *nodeinfo;
+ select_jobinfo_t *jobinfo;
+ select_jobinfo_t *step_jobinfo;
+ struct node_record *node_ptr = NULL;
+ ListIterator itr, itr2;
+ ba_mp_t *ba_mp = NULL, *found_ba_mp;
+ int i;
+
+ xassert(step_ptr);
+
+ jobinfo = step_ptr->job_ptr->select_jobinfo->data;
+ step_jobinfo = step_ptr->select_jobinfo->data;
+
+ /* block_state must be locked before ba_system */
+ slurm_mutex_lock(&block_state_mutex);
+ slurm_mutex_lock(&ba_system_mutex);
+ for (i=0; i<bit_size(step_ptr->step_node_bitmap); i++) {
+ if (!bit_test(step_ptr->step_node_bitmap, i))
+ continue;
+ ba_mp = ba_inx2ba_mp(i);
+ xassert(ba_mp);
+
+ if (!ba_mp->cnode_err_bitmap)
+ ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+
+ if (jobinfo->units_avail) {
+ bit_or(ba_mp->cnode_err_bitmap,
+ step_jobinfo->units_used);
+ } else {
+ bit_nset(ba_mp->cnode_err_bitmap, 0,
+ bit_size(ba_mp->cnode_err_bitmap)-1);
+ }
+ node_ptr = &(node_record_table_ptr[ba_mp->index]);
+ xassert(node_ptr->select_nodeinfo);
+ nodeinfo = (select_nodeinfo_t *)node_ptr->select_nodeinfo->data;
+ xassert(nodeinfo);
+ xfree(nodeinfo->failed_cnodes);
+ nodeinfo->failed_cnodes = ba_node_map_ranged_hostlist(
+ ba_mp->cnode_err_bitmap, ba_mp_geo_system);
+ }
+
+ if (!ba_mp) {
+ error("select_p_fail_cnode: no ba_mp? "
+ "This should never happen");
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+ return SLURM_ERROR;
+ }
+
+ itr = list_iterator_create(bg_lists->main);
+ while ((bg_record = (bg_record_t *)list_next(itr))) {
+ float err_ratio;
+ if (!bit_overlap(step_ptr->step_node_bitmap,
+ bg_record->mp_bitmap))
+ continue;
+ bg_record->cnode_err_cnt = 0;
+ itr2 = list_iterator_create(bg_record->ba_mp_list);
+ while ((found_ba_mp = (ba_mp_t *)list_next(itr2))) {
+
+ if (!found_ba_mp->used
+ || !bit_test(step_ptr->step_node_bitmap,
+ found_ba_mp->index))
+ continue;
+
+ /* perhaps this block isn't involved in this
+ error */
+ if (jobinfo->units_avail
+ && found_ba_mp->cnode_usable_bitmap
+ && bit_overlap(found_ba_mp->cnode_usable_bitmap,
+ ba_mp->cnode_err_bitmap))
+ continue;
+
+ if (!found_ba_mp->cnode_err_bitmap)
+ found_ba_mp->cnode_err_bitmap =
+ bit_alloc(bg_conf->mp_cnode_cnt);
+
+ bit_or(found_ba_mp->cnode_err_bitmap,
+ ba_mp->cnode_err_bitmap);
+ bg_record->cnode_err_cnt +=
+ bit_set_count(found_ba_mp->cnode_err_bitmap);
+ }
+ list_iterator_destroy(itr2);
+
+ err_ratio = (float)bg_record->cnode_err_cnt
+ / (float)bg_record->cnode_cnt;
+ bg_record->err_ratio = err_ratio * 100;
+
+ /* handle really small ratios */
+ if (!bg_record->err_ratio && bg_record->cnode_err_cnt)
+ bg_record->err_ratio = 1;
+
+ debug("select_p_fail_cnode: "
+ "count in error for %s is %u with ratio at %u",
+ bg_record->bg_block_id,
+ bg_record->cnode_err_cnt,
+ bg_record->err_ratio);
+
+ }
+ list_iterator_destroy(itr);
+ slurm_mutex_unlock(&ba_system_mutex);
+ slurm_mutex_unlock(&block_state_mutex);
+#endif
+ return SLURM_SUCCESS;
+}
+
extern int select_p_get_info_from_plugin (enum select_plugindata_info dinfo,
struct job_record *job_ptr,
void *data)
@@ -2450,10 +3000,10 @@
*tmp32 = 0;
break;
case SELECT_STATIC_PART:
- if (bg_conf->layout_mode == LAYOUT_STATIC)
- *tmp16 = 1;
- else
+ if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
*tmp16 = 0;
+ else /* LAYOUT_STATIC || LAYOUT_OVERLAP */
+ *tmp16 = 1;
break;
case SELECT_CONFIG_INFO:
@@ -2485,14 +3035,17 @@
{
#ifdef HAVE_BG
ba_mp_t *curr_mp;
+ int rc = SLURM_SUCCESS;
xassert(node_ptr);
- if(!(curr_mp = str2ba_mp(node_ptr->name)))
- return SLURM_ERROR;
-
- ba_update_mp_state(curr_mp, node_ptr->node_state);
- return SLURM_SUCCESS;
+ slurm_mutex_lock(&ba_system_mutex);
+ if ((curr_mp = str2ba_mp(node_ptr->name)))
+ ba_update_mp_state(curr_mp, node_ptr->node_state);
+ else
+ rc = SLURM_ERROR;
+ slurm_mutex_unlock(&ba_system_mutex);
+ return rc;
#else
return SLURM_ERROR;
#endif
@@ -2514,8 +3067,12 @@
switch (type) {
case SELECT_GET_NODE_SCALING:
- if ((*nodes) != INFINITE)
- (*nodes) = bg_conf->mp_cnode_cnt;
+ if ((*nodes) != INFINITE) {
+ if (bg_conf->sub_mp_sys)
+ (*nodes) = bg_conf->actual_cnodes_per_mp;
+ else
+ (*nodes) = bg_conf->mp_cnode_cnt;
+ }
break;
case SELECT_GET_NODE_CPU_CNT:
if ((*cpus) != (uint16_t)INFINITE)
@@ -2544,11 +3101,18 @@
* don't scale up this value. */
break;
}
- (*nodes) *= bg_conf->mp_cnode_cnt;
+ if (bg_conf->sub_mp_sys)
+ (*nodes) = bg_conf->actual_cnodes_per_mp;
+ else
+ (*nodes) *= bg_conf->mp_cnode_cnt;
break;
case SELECT_APPLY_NODE_MAX_OFFSET:
- if ((*nodes) != INFINITE)
- (*nodes) *= bg_conf->mp_cnode_cnt;
+ if ((*nodes) != INFINITE) {
+ if (bg_conf->sub_mp_sys)
+ (*nodes) = bg_conf->actual_cnodes_per_mp;
+ else
+ (*nodes) *= bg_conf->mp_cnode_cnt;
+ }
break;
case SELECT_SET_NODE_CNT:
get_select_jobinfo(job_desc->select_jobinfo->data,
@@ -2562,6 +3126,14 @@
if (job_desc->min_nodes == (uint32_t) NO_VAL)
return SLURM_SUCCESS;
+ else if ((job_desc->min_nodes == 1)
+ && (job_desc->min_cpus != NO_VAL)) {
+ job_desc->min_nodes = job_desc->min_cpus;
+ if (job_desc->ntasks_per_node
+ && job_desc->ntasks_per_node != (uint16_t)NO_VAL)
+ job_desc->min_nodes /=
+ job_desc->ntasks_per_node;
+ }
get_select_jobinfo(job_desc->select_jobinfo->data,
SELECT_JOBDATA_GEOMETRY, &req_geometry);
@@ -2580,9 +3152,50 @@
set min_nodes correctly
*/
if ((job_desc->min_cpus != NO_VAL)
- && (job_desc->min_cpus > job_desc->min_nodes))
- job_desc->min_nodes =
- job_desc->min_cpus / bg_conf->cpu_ratio;
+ && (job_desc->min_cpus > job_desc->min_nodes)) {
+ float tmp_float = (float)job_desc->min_cpus
+ / (float)bg_conf->cpu_ratio;
+
+ tmp = (uint32_t)tmp_float;
+ if (tmp_float != (float)tmp)
+ tmp++;
+ if (tmp > job_desc->min_nodes) {
+ /* This means they actually asked for
+ nodes and tasks.
+ */
+ if ((job_desc->max_nodes != NO_VAL)
+ && (tmp > job_desc->max_nodes)) {
+#ifndef HAVE_BG_L_P
+ float divisor = 0;
+ /* ntasks_per_node should be
+ * validated beforehand. */
+ if (job_desc->ntasks_per_node
+ && (job_desc->ntasks_per_node
+ != (uint16_t)NO_VAL))
+ divisor = (float)job_desc->
+ ntasks_per_node
+ / bg_conf->cpu_ratio;
+ /* On Q systems you can have 2
+ processes per thread */
+ if (!divisor || divisor > 2) {
+ error("Asking for more "
+ "resources than "
+ "possible. Denied.");
+ return SLURM_ERROR;
+ } else
+ tmp /= divisor;
+#else
+ error("Asking for more resources than "
+ "possible. Requested %u nodes "
+ "and %u "
+ "tasks, giving them %u nodes.",
+ job_desc->min_nodes,
+ job_desc->min_cpus, tmp);
+#endif
+ }
+ job_desc->min_nodes = tmp;
+ }
+ }
/* initialize min_cpus to the min_nodes */
job_desc->min_cpus = job_desc->min_nodes * bg_conf->cpu_ratio;
@@ -2635,19 +3248,24 @@
job_desc->min_cpus = bg_conf->cpus_per_mp/tmp;
job_desc->min_nodes = 1;
#else
- i = bg_conf->smallest_block;
- while (i <= bg_conf->mp_cnode_cnt) {
- if (job_desc->min_nodes <= i) {
- job_desc->min_nodes = i;
- break;
+ /* If it is allowed to run sub block allocations then
+ an allocation can be any size. If it doesn't line
+ up with a geometry it will be massaged later.
+ */
+ if (!bg_conf->sub_blocks) {
+ i = bg_conf->smallest_block;
+ while (i <= bg_conf->mp_cnode_cnt) {
+ if (job_desc->min_nodes <= i) {
+ job_desc->min_nodes = i;
+ break;
+ }
+ i *= 2;
}
- i *= 2;
}
set_select_jobinfo(job_desc->select_jobinfo->data,
SELECT_JOBDATA_NODE_CNT,
&job_desc->min_nodes);
-
job_desc->min_cpus = job_desc->min_nodes
* bg_conf->cpu_ratio;
job_desc->min_nodes = 1;
@@ -2685,13 +3303,15 @@
job_desc->max_cpus = bg_conf->cpus_per_mp/tmp;
job_desc->max_nodes = 1;
#else
- i = bg_conf->smallest_block;
- while (i <= bg_conf->mp_cnode_cnt) {
- if (job_desc->max_nodes <= i) {
- job_desc->max_nodes = i;
- break;
+ if (!bg_conf->sub_blocks) {
+ i = bg_conf->smallest_block;
+ while (i <= bg_conf->mp_cnode_cnt) {
+ if (job_desc->max_nodes <= i) {
+ job_desc->max_nodes = i;
+ break;
+ }
+ i *= 2;
}
- i *= 2;
}
job_desc->max_cpus =
job_desc->max_nodes * bg_conf->cpu_ratio;
@@ -2743,6 +3363,68 @@
extern bitstr_t *select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt)
{
+#ifdef HAVE_BG
+ /* Reserve a block of appropriate geometry by issuing a fake job
+ * WILL_RUN call */
+ int i, rc;
+ uint32_t tmp_u32;
+ uint16_t conn_type[SYSTEM_DIMENSIONS];
+ uint16_t geo[SYSTEM_DIMENSIONS];
+ uint16_t reboot = 0;
+ uint16_t rotate = 1;
+ List preemptee_candidates, preemptee_job_list;
+ struct job_record job_rec;
+ bitstr_t *tmp_bitmap;
+
+ memset(&job_rec, 0, sizeof(struct job_record));
+ job_rec.details = xmalloc(sizeof(struct job_details));
+ job_rec.select_jobinfo = select_g_select_jobinfo_alloc();
+
+ tmp_u32 = 1;
+ set_select_jobinfo(job_rec.select_jobinfo->data,
+ SELECT_JOBDATA_ALTERED, &tmp_u32);
+ set_select_jobinfo(job_rec.select_jobinfo->data,
+ SELECT_JOBDATA_NODE_CNT, &node_cnt);
+ for (i = 0; i < SYSTEM_DIMENSIONS; i++) {
+ conn_type[i] = SELECT_NAV;
+ geo[i] = 0;
+ }
+ select_g_select_jobinfo_set(job_rec.select_jobinfo,
+ SELECT_JOBDATA_GEOMETRY, &geo);
+ select_g_select_jobinfo_set(job_rec.select_jobinfo,
+ SELECT_JOBDATA_CONN_TYPE, &conn_type);
+ select_g_select_jobinfo_set(job_rec.select_jobinfo,
+ SELECT_JOBDATA_REBOOT, &reboot);
+ select_g_select_jobinfo_set(job_rec.select_jobinfo,
+ SELECT_JOBDATA_ROTATE, &rotate);
+
+ job_rec.details->min_cpus = node_cnt * bg_conf->cpus_per_mp;
+ job_rec.details->max_cpus = job_rec.details->min_cpus;
+ tmp_bitmap = bit_copy(avail_bitmap);
+
+ preemptee_candidates = list_create(NULL);
+ if (preemptee_candidates == NULL)
+ fatal("list_create: malloc failure");
+
+ rc = submit_job(&job_rec, tmp_bitmap, node_cnt, node_cnt, node_cnt,
+ SELECT_MODE_WILL_RUN, preemptee_candidates,
+ &preemptee_job_list);
+
+ list_destroy(preemptee_candidates);
+ xfree(job_rec.details);
+ select_g_select_jobinfo_free(job_rec.select_jobinfo);
+
+ if (rc == SLURM_SUCCESS) {
+ char *resv_nodes = bitmap2node_name(tmp_bitmap);
+ info("Reservation request for %u nodes satisfied with %s",
+ node_cnt, resv_nodes);
+ xfree(resv_nodes);
+ return tmp_bitmap;
+ } else {
+ info("Reservation request for %u nodes failed", node_cnt);
+ FREE_NULL_BITMAP(tmp_bitmap);
+ }
+#endif
return NULL;
}
diff --git a/src/plugins/select/bluegene/sfree/Makefile.in b/src/plugins/select/bluegene/sfree/Makefile.in
index 2943c8f..b3588ee 100644
--- a/src/plugins/select/bluegene/sfree/Makefile.in
+++ b/src/plugins/select/bluegene/sfree/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -190,6 +190,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -226,6 +227,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -396,7 +398,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sfree$(EXEEXT): $(sfree_OBJECTS) $(sfree_DEPENDENCIES)
+sfree$(EXEEXT): $(sfree_OBJECTS) $(sfree_DEPENDENCIES) $(EXTRA_sfree_DEPENDENCIES)
@rm -f sfree$(EXEEXT)
$(sfree_LINK) $(sfree_OBJECTS) $(sfree_LDADD) $(LIBS)
@@ -535,10 +537,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/bluegene/sfree/sfree.c b/src/plugins/select/bluegene/sfree/sfree.c
index fe16af4..528eeb6 100644
--- a/src/plugins/select/bluegene/sfree/sfree.c
+++ b/src/plugins/select/bluegene/sfree/sfree.c
@@ -53,7 +53,7 @@
static int _get_new_info_block(block_info_msg_t **block_ptr)
{
int error_code = SLURM_NO_CHANGE_IN_DATA;
-#ifdef HAVE_BG_L_P
+#ifdef HAVE_BG
static block_info_msg_t *bg_info_ptr = NULL;
static block_info_msg_t *new_bg_ptr = NULL;
@@ -141,6 +141,10 @@
int i=0;
block_info_msg_t *block_ptr = NULL;
_get_new_info_block(&block_ptr);
+ if (!block_ptr) {
+ error("there was no block_ptr filled in.");
+ return 1;
+ }
if (block_list)
list_flush(block_list);
else
@@ -168,6 +172,7 @@
list_iterator_destroy(itr);
if (wait_full)
_check_status();
+
list_destroy(block_list);
info("done");
return 0;
diff --git a/src/plugins/select/bluegene/slurm_epilog.c b/src/plugins/select/bluegene/slurm_epilog.c
index e297ca4..15b41a9 100644
--- a/src/plugins/select/bluegene/slurm_epilog.c
+++ b/src/plugins/select/bluegene/slurm_epilog.c
@@ -54,24 +54,14 @@
#include "slurm/slurm.h"
-#include "src/common/hostlist.h"
-
#define _DEBUG 0
/*
* Check the bgblock's status every POLL_SLEEP seconds.
- * Retry for a period of MIN_DELAY + (INCR_DELAY * base partition count).
- * For example if MIN_DELAY=300 and INCR_DELAY=20, wait up to 428 seconds
- * for a 16 base partition bgblock to ready (300 + 20 * 16).
+ * Retry until the job is removed
*/
#define POLL_SLEEP 3 /* retry interval in seconds */
-#define MIN_DELAY 300 /* time in seconds */
-#define INCR_DELAY 20 /* time in seconds per BP */
-int max_delay = MIN_DELAY;
-int cur_delay = 0;
-
-static int _get_job_size(uint32_t job_id);
static void _wait_part_not_ready(uint32_t job_id);
int main(int argc, char *argv[])
@@ -97,23 +87,18 @@
static void _wait_part_not_ready(uint32_t job_id)
{
- int is_ready = 1, i, rc;
-
- max_delay = MIN_DELAY + (INCR_DELAY * _get_job_size(job_id));
+ int is_ready = 1, rc;
#if _DEBUG
printf("Waiting for job %u to be not ready.", job_id);
#endif
- for (i=0; (cur_delay < max_delay); i++) {
- if (i) {
- sleep(POLL_SLEEP);
- cur_delay += POLL_SLEEP;
-#if _DEBUG
- printf(".");
-#endif
- }
-
+ /* It has been decided waiting forever is a better solution
+ than ending early and saying we are done when in reality
+ the job is still running. So now we trust the slurmctld to
+ tell us when we are done and never end until that happens.
+ */
+ while (1) {
rc = slurm_job_node_ready(job_id);
if (rc == READY_JOB_FATAL)
break; /* fatal error */
@@ -123,6 +108,10 @@
is_ready = 0;
break;
}
+ sleep(POLL_SLEEP);
+#if _DEBUG
+ printf(".");
+#endif
}
#if _DEBUG
@@ -135,34 +124,3 @@
fprintf(stderr, "Job %u is still ready.\n", job_id);
}
-
-static int _get_job_size(uint32_t job_id)
-{
- job_info_msg_t *job_buffer_ptr;
- job_info_t * job_ptr;
- int i, size = 1;
- hostlist_t hl;
-
- if (slurm_load_jobs((time_t) 0, &job_buffer_ptr, SHOW_ALL)) {
- slurm_perror("slurm_load_jobs");
- return 1;
- }
-
- for (i = 0; i < job_buffer_ptr->record_count; i++) {
- job_ptr = &job_buffer_ptr->job_array[i];
- if (job_ptr->job_id != job_id)
- continue;
- hl = hostlist_create(job_ptr->nodes);
- if (hl) {
- size = hostlist_count(hl);
- hostlist_destroy(hl);
- }
- break;
- }
- slurm_free_job_info_msg (job_buffer_ptr);
-
-#if _DEBUG
- printf("Size is %d\n", size);
-#endif
- return size;
-}
diff --git a/src/plugins/select/cons_res/Makefile.in b/src/plugins/select/cons_res/Makefile.in
index 2829b96..1ce8622 100644
--- a/src/plugins/select/cons_res/Makefile.in
+++ b/src/plugins/select/cons_res/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
select_cons_res_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -408,7 +416,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-select_cons_res.la: $(select_cons_res_la_OBJECTS) $(select_cons_res_la_DEPENDENCIES)
+select_cons_res.la: $(select_cons_res_la_OBJECTS) $(select_cons_res_la_DEPENDENCIES) $(EXTRA_select_cons_res_la_DEPENDENCIES)
$(select_cons_res_la_LINK) -rpath $(pkglibdir) $(select_cons_res_la_OBJECTS) $(select_cons_res_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -547,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c
index f8b23ad..3feafe8 100644
--- a/src/plugins/select/cons_res/dist_tasks.c
+++ b/src/plugins/select/cons_res/dist_tasks.c
@@ -402,7 +402,7 @@
* The CPU array contains the distribution of CPUs, which can include
* virtual CPUs (hyperthreads)
*/
-static void _cyclic_sync_core_bitmap(struct job_record *job_ptr,
+static int _cyclic_sync_core_bitmap(struct job_record *job_ptr,
const uint16_t cr_type)
{
uint32_t c, i, j, s, n, *sock_start, *sock_end, size, csize, core_cnt;
@@ -411,9 +411,10 @@
bitstr_t *core_map;
bool *sock_used, alloc_cores = false, alloc_sockets = false;
uint16_t ntasks_per_core = 0xffff;
+ int error_code = SLURM_SUCCESS;
if ((job_res == NULL) || (job_res->core_bitmap == NULL))
- return;
+ return error_code;
if (cr_type & CR_CORE)
alloc_cores = true;
@@ -499,7 +500,12 @@
}
if (prev_cpus == cpus) {
/* we're stuck! */
- fatal("cons_res: sync loop not progressing");
+ job_ptr->priority = 0;
+ job_ptr->state_reason = WAIT_HELD;
+ error("cons_res: sync loop not progressing, "
+ "holding job %u", job_ptr->job_id);
+ error_code = SLURM_ERROR;
+ goto fini;
}
}
/* clear the rest of the cores in each socket
@@ -528,9 +534,10 @@
/* advance 'c' to the beginning of the next node */
c += sockets * cps;
}
- xfree(sock_start);
+fini: xfree(sock_start);
xfree(sock_end);
xfree(sock_used);
+ return error_code;
}
@@ -636,11 +643,11 @@
case SLURM_DIST_BLOCK_CYCLIC:
case SLURM_DIST_CYCLIC_CYCLIC:
case SLURM_DIST_UNKNOWN:
- _cyclic_sync_core_bitmap(job_ptr, cr_type);
+ error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type);
break;
default:
error("select/cons_res: invalid task_dist entry");
return SLURM_ERROR;
}
- return SLURM_SUCCESS;
+ return error_code;
}
diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c
index e00a483..1e3d51f 100644
--- a/src/plugins/select/cons_res/job_test.c
+++ b/src/plugins/select/cons_res/job_test.c
@@ -294,7 +294,8 @@
} else {
j = avail_cpus / cpus_per_task;
num_tasks = MIN(num_tasks, j);
- avail_cpus = num_tasks * cpus_per_task;
+ if (job_ptr->details->ntasks_per_node)
+ avail_cpus = num_tasks * cpus_per_task;
}
if ((job_ptr->details->ntasks_per_node &&
(num_tasks < job_ptr->details->ntasks_per_node)) ||
@@ -382,7 +383,7 @@
uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map,
const uint32_t node_i, bool cpu_type)
{
- uint16_t cpu_count = 0, avail_cpus = 0, num_tasks = 0;
+ uint16_t avail_cpus = 0, num_tasks = 0;
uint32_t core_begin = cr_get_coremap_offset(node_i);
uint32_t core_end = cr_get_coremap_offset(node_i+1);
uint32_t c;
@@ -521,8 +522,6 @@
} else {
j = avail_cpus / cpus_per_task;
num_tasks = MIN(num_tasks, j);
- if (job_ptr->details->ntasks_per_node)
- avail_cpus = num_tasks * cpus_per_task;
}
if ((job_ptr->details->ntasks_per_node &&
@@ -544,21 +543,10 @@
bit_clear(core_map, c);
else {
free_cores[i]--;
- /* we have to ensure that cpu_count
- * is not bigger than avail_cpus due to
- * hyperthreading or this would break
- * the selection logic providing more
- * cpus than allowed after task-related data
- * processing of stage 3
- */
- if (avail_cpus >= threads_per_core) {
+ if (avail_cpus >= threads_per_core)
avail_cpus -= threads_per_core;
- cpu_count += threads_per_core;
- }
- else {
- cpu_count += avail_cpus;
+ else
avail_cpus = 0;
- }
}
}
@@ -569,10 +557,9 @@
fini:
if (!num_tasks) {
bit_nclear(core_map, core_begin, core_end-1);
- cpu_count = 0;
}
xfree(free_cores);
- return cpu_count;
+ return num_tasks * cpus_per_task;
}
@@ -602,7 +589,7 @@
{
uint16_t cpus;
uint32_t avail_mem, req_mem, gres_cpus;
- int core_start_bit, core_end_bit;
+ int core_start_bit, core_end_bit, cpu_alloc_size;
struct node_record *node_ptr = node_record_table_ptr + node_i;
List gres_list;
@@ -612,13 +599,19 @@
return cpus;
}
- if (cr_type & CR_CORE)
+ if (cr_type & CR_CORE) {
cpus = _allocate_cores(job_ptr, core_map, node_i, false);
- else if (cr_type & CR_SOCKET)
+ /* cpu_alloc_size = CPUs per core */
+ cpu_alloc_size = select_node_record[node_i].vpus;
+ } else if (cr_type & CR_SOCKET) {
cpus = _allocate_sockets(job_ptr, core_map, node_i);
- else
+ /* cpu_alloc_size = CPUs per socket */
+ cpu_alloc_size = select_node_record[node_i].cores *
+ select_node_record[node_i].vpus;
+ } else {
cpus = _allocate_cores(job_ptr, core_map, node_i, true);
-
+ cpu_alloc_size = 1;
+ }
core_start_bit = cr_get_coremap_offset(node_i);
core_end_bit = cr_get_coremap_offset(node_i+1) - 1;
node_ptr = select_node_record[node_i].node_ptr;
@@ -635,7 +628,7 @@
if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
/* memory is per-cpu */
while ((cpus > 0) && ((req_mem * cpus) > avail_mem))
- cpus--;
+ cpus -= cpu_alloc_size;
if ((cpus < job_ptr->details->ntasks_per_node) ||
((job_ptr->details->cpus_per_task > 1) &&
(cpus < job_ptr->details->cpus_per_task)))
@@ -661,8 +654,8 @@
((job_ptr->details->cpus_per_task > 1) &&
(gres_cpus < job_ptr->details->cpus_per_task)))
gres_cpus = 0;
- if (gres_cpus < cpus)
- cpus = gres_cpus;
+ while (gres_cpus < cpus)
+ cpus -= cpu_alloc_size;
if (cpus == 0)
bit_nclear(core_map, core_start_bit, core_end_bit);
@@ -759,8 +752,12 @@
/* node-level memory check */
if ((job_ptr->details->pn_min_memory) &&
(cr_type & CR_MEMORY)) {
- free_mem = select_node_record[i].real_memory;
- free_mem -= node_usage[i].alloc_memory;
+ if (select_node_record[i].real_memory >
+ node_usage[i].alloc_memory)
+ free_mem = select_node_record[i].real_memory -
+ node_usage[i].alloc_memory;
+ else
+ free_mem = 0;
if (free_mem < min_mem) {
debug3("cons_res: _vns: node %s no mem %u < %u",
select_node_record[i].node_ptr->name,
@@ -969,7 +966,8 @@
int *consec_req; /* are nodes from this set required
* (in req_bitmap) */
int consec_index, consec_size, sufficient;
- int rem_cpus, rem_nodes; /* remaining resources desired */
+ int rem_cpus, rem_nodes; /* remaining resources desired */
+ int min_rem_nodes; /* remaining resources desired */
int total_cpus = 0; /* #CPUs allocated to job */
int best_fit_nodes, best_fit_cpus, best_fit_req;
int best_fit_sufficient, best_fit_index = 0;
@@ -1013,6 +1011,7 @@
rem_cpus = details_ptr->min_cpus;
rem_nodes = MAX(min_nodes, req_nodes);
+ min_rem_nodes = min_nodes;
for (i = 0, ll = -1; i < cr_node_cnt; i++) {
if (req_map)
@@ -1038,6 +1037,7 @@
total_cpus += avail_cpus;
rem_cpus -= avail_cpus;
rem_nodes--;
+ min_rem_nodes--;
/* leaving bitmap set, decrement max limit */
max_nodes--;
} else { /* node not selected (yet) */
@@ -1165,7 +1165,8 @@
* requested nodes here we will still give
* them and then the step layout will sort
* things out. */
- _cpus_to_use(&avail_cpus, rem_cpus, rem_nodes,
+ _cpus_to_use(&avail_cpus, rem_cpus,
+ min_rem_nodes,
details_ptr, &cpu_cnt[i]);
total_cpus += avail_cpus;
/* enforce the max_cpus limit */
@@ -1179,6 +1180,7 @@
}
bit_set(node_map, i);
rem_nodes--;
+ min_rem_nodes--;
max_nodes--;
rem_cpus -= avail_cpus;
}
@@ -1197,7 +1199,8 @@
* requested nodes here we will still give
* them and then the step layout will sort
* things out. */
- _cpus_to_use(&avail_cpus, rem_cpus, rem_nodes,
+ _cpus_to_use(&avail_cpus, rem_cpus,
+ min_rem_nodes,
details_ptr, &cpu_cnt[i]);
total_cpus += avail_cpus;
/* enforce the max_cpus limit */
@@ -1212,6 +1215,7 @@
rem_cpus -= avail_cpus;
bit_set(node_map, i);
rem_nodes--;
+ min_rem_nodes--;
max_nodes--;
}
} else {
@@ -1275,7 +1279,8 @@
* requested nodes here we will still give
* them and then the step layout will sort
* things out. */
- _cpus_to_use(&avail_cpus, rem_cpus, rem_nodes,
+ _cpus_to_use(&avail_cpus, rem_cpus,
+ min_rem_nodes,
details_ptr, &cpu_cnt[i]);
total_cpus += avail_cpus;
/* enforce the max_cpus limit */
@@ -1290,6 +1295,7 @@
rem_cpus -= avail_cpus;
bit_set(node_map, i);
rem_nodes--;
+ min_rem_nodes--;
max_nodes--;
}
xfree(cpus_array);
@@ -1335,6 +1341,7 @@
bitstr_t *avail_nodes_bitmap = NULL; /* nodes on any switch */
bitstr_t *req_nodes_bitmap = NULL;
int rem_cpus, rem_nodes; /* remaining resources desired */
+ int min_rem_nodes; /* remaining resources desired */
int avail_cpus;
int total_cpus = 0; /* #CPUs allocated to job */
int i, j, rc = SLURM_SUCCESS;
@@ -1353,10 +1360,8 @@
}
rem_cpus = job_ptr->details->min_cpus;
- if (req_nodes > min_nodes)
- rem_nodes = req_nodes;
- else
- rem_nodes = min_nodes;
+ rem_nodes = MAX(min_nodes, req_nodes);
+ min_rem_nodes = min_nodes;
if (job_ptr->details->req_node_bitmap) {
req_nodes_bitmap = bit_copy(job_ptr->details->req_node_bitmap);
@@ -1451,9 +1456,10 @@
* requested nodes here we will still give
* them and then the step layout will sort
* things out. */
- _cpus_to_use(&avail_cpus, rem_cpus, rem_nodes,
+ _cpus_to_use(&avail_cpus, rem_cpus, min_rem_nodes,
job_ptr->details, &cpu_cnt[i]);
rem_nodes--;
+ min_rem_nodes--;
max_nodes--;
total_cpus += avail_cpus;
rem_cpus -= avail_cpus;
@@ -1724,7 +1730,7 @@
* requested nodes here we will still give
* them and then the step layout will sort
* things out. */
- _cpus_to_use(&bfsize, rem_cpus, rem_nodes,
+ _cpus_to_use(&bfsize, rem_cpus, min_rem_nodes,
job_ptr->details, &cpu_cnt[bfloc]);
/* enforce the max_cpus limit */
@@ -1739,6 +1745,7 @@
bit_set(bitmap, bfloc);
total_cpus += bfsize;
rem_nodes--;
+ min_rem_nodes--;
max_nodes--;
rem_cpus -= bfsize;
}
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 7534a92..b9a7539 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -833,6 +833,7 @@
struct part_res_record *p_ptr;
List gres_list;
int i, n;
+ bitstr_t *core_bitmap;
if (!job || !job->core_bitmap) {
error("job %u has no select data", job_ptr->job_id);
@@ -856,10 +857,13 @@
gres_list = select_node_usage[i].gres_list;
else
gres_list = node_ptr->gres_list;
+ core_bitmap = copy_job_resources_node(job, n);
gres_plugin_job_alloc(job_ptr->gres_list, gres_list,
job->nhosts, n, job->cpus[n],
- job_ptr->job_id, node_ptr->name);
+ job_ptr->job_id, node_ptr->name,
+ core_bitmap);
gres_plugin_node_state_log(gres_list, node_ptr->name);
+ FREE_NULL_BITMAP(core_bitmap);
}
if (action != 2) {
@@ -2320,6 +2324,7 @@
{
int rc = SLURM_SUCCESS;
uint16_t *uint16 = (uint16_t *) data;
+ char **tmp_char = (char **) data;
select_nodeinfo_t **select_nodeinfo = (select_nodeinfo_t **) data;
if (nodeinfo == NULL) {
@@ -2345,6 +2350,10 @@
case SELECT_NODEDATA_PTR:
*select_nodeinfo = nodeinfo;
break;
+ case SELECT_NODEDATA_RACK_MP:
+ case SELECT_NODEDATA_EXTRA_INFO:
+ *tmp_char = NULL;
+ break;
default:
error("Unsupported option %d for get_nodeinfo.", dinfo);
rc = SLURM_ERROR;
@@ -2422,6 +2431,11 @@
return SLURM_SUCCESS;
}
+extern int select_p_fail_cnode(struct step_record *step_ptr)
+{
+ return SLURM_SUCCESS;
+}
+
extern int select_p_get_info_from_plugin(enum select_plugindata_info info,
struct job_record *job_ptr,
void *data)
diff --git a/src/plugins/select/cray/Makefile.am b/src/plugins/select/cray/Makefile.am
index f6c638d..2ec30a3 100644
--- a/src/plugins/select/cray/Makefile.am
+++ b/src/plugins/select/cray/Makefile.am
@@ -18,7 +18,8 @@
other_select.c \
other_select.h \
parser_common.h \
- cray_config.c
+ cray_config.c \
+ cray_config.h
select_cray_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
if HAVE_CRAY
diff --git a/src/plugins/select/cray/Makefile.in b/src/plugins/select/cray/Makefile.in
index edd3aa4..522ed9e 100644
--- a/src/plugins/select/cray/Makefile.in
+++ b/src/plugins/select/cray/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -106,6 +106,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
@HAVE_CRAY_EMULATION_FALSE@@HAVE_CRAY_TRUE@select_cray_la_DEPENDENCIES = \
@@ -113,7 +119,7 @@
@HAVE_CRAY_EMULATION_TRUE@@HAVE_CRAY_TRUE@select_cray_la_DEPENDENCIES = libemulate/libalps.la
am__select_cray_la_SOURCES_DIST = select_cray.c basil_interface.h \
nodespec.c other_select.c other_select.h parser_common.h \
- cray_config.c basil_interface.c
+ cray_config.c cray_config.h basil_interface.c
@HAVE_CRAY_TRUE@am__objects_1 = select_cray_la-basil_interface.lo
am_select_cray_la_OBJECTS = select_cray_la-select_cray.lo \
select_cray_la-nodespec.lo select_cray_la-other_select.lo \
@@ -255,6 +261,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -291,6 +298,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -387,7 +395,7 @@
# Cray node selection plugin.
select_cray_la_SOURCES = select_cray.c basil_interface.h nodespec.c \
other_select.c other_select.h parser_common.h cray_config.c \
- $(am__append_1)
+ cray_config.h $(am__append_1)
select_cray_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
@HAVE_CRAY_TRUE@select_cray_la_CFLAGS = $(MYSQL_CFLAGS)
@HAVE_CRAY_EMULATION_FALSE@@HAVE_CRAY_TRUE@SUBDIRS = libalps
@@ -459,7 +467,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-select_cray.la: $(select_cray_la_OBJECTS) $(select_cray_la_DEPENDENCIES)
+select_cray.la: $(select_cray_la_OBJECTS) $(select_cray_la_DEPENDENCIES) $(EXTRA_select_cray_la_DEPENDENCIES)
$(select_cray_la_LINK) -rpath $(pkglibdir) $(select_cray_la_OBJECTS) $(select_cray_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -747,10 +755,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/cray/basil_alps.h b/src/plugins/select/cray/basil_alps.h
index 4010480..8e61e61 100644
--- a/src/plugins/select/cray/basil_alps.h
+++ b/src/plugins/select/cray/basil_alps.h
@@ -57,6 +57,7 @@
BV_4_0, /* Basil 1.2 CLE 4.x unconfirmed simulator version */
BV_4_1, /* Basil 1.2 CLE 4.x unconfirmed simulator version */
BV_5_0, /* Basil 1.2 CLE 5.x unconfirmed simulator version */
+ BV_5_1, /* Basil 1.2 CLE 5.x unconfirmed simulator version */
BV_MAX
};
@@ -544,7 +545,7 @@
struct nodespec *next;
};
-extern int ns_add_node(struct nodespec **head, uint32_t node_id);
+extern int ns_add_node(struct nodespec **head, uint32_t node_id, bool sorted);
extern char *ns_to_string(const struct nodespec *head);
extern void free_nodespec(struct nodespec *head);
diff --git a/src/plugins/select/cray/basil_interface.c b/src/plugins/select/cray/basil_interface.c
index a9ebc8c..5a52a63 100644
--- a/src/plugins/select/cray/basil_interface.c
+++ b/src/plugins/select/cray/basil_interface.c
@@ -6,12 +6,18 @@
*/
#include "basil_interface.h"
#include "basil_alps.h"
+#include "src/common/gres.h"
#include "src/common/slurm_accounting_storage.h"
#define _DEBUG 0
int dim_size[3] = {0, 0, 0};
+typedef struct args_sig_basil {
+ uint32_t resv_id;
+ int signal;
+ uint16_t delay;
+} args_sig_basil_t;
/*
* Following routines are from src/plugins/select/bluegene/plugin/jobinfo.c
@@ -684,6 +690,36 @@
return SLURM_SUCCESS;
}
+struct basil_accel_param* build_accel_param(struct job_record* job_ptr)
+{
+ int gpu_mem_req;
+ struct basil_accel_param* head,* bap_ptr;
+
+ gpu_mem_req = gres_plugin_get_job_value_by_type(job_ptr->gres_list,
+ "gpu_mem");
+
+ if (gpu_mem_req == NO_VAL)
+ gpu_mem_req = 0;
+
+ if (!job_ptr) {
+ info("The job_ptr is NULL; nothing to do!");
+ return NULL;
+ } else if (!job_ptr->details) {
+ info("The job_ptr->details is NULL; nothing to do!");
+ return NULL;
+ }
+
+ head = xmalloc(sizeof(struct basil_accel_param));
+ bap_ptr = head;
+ bap_ptr->type = BA_GPU; /* Currently BASIL only permits
+ * generic resources of type GPU. */
+ bap_ptr->memory_mb = gpu_mem_req;
+ bap_ptr->next = NULL;
+
+ return head;
+}
+
+
/**
* do_basil_reserve - create a BASIL reservation.
* IN job_ptr - pointer to job which has just been allocated resources
@@ -701,6 +737,7 @@
int i, first_bit, last_bit;
long rc;
char *user, batch_id[16];
+ struct basil_accel_param* bap;
if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0)
return SLURM_SUCCESS;
@@ -747,7 +784,7 @@
fatal("can not read basil_node_id from %s",
node_ptr->name);
- if (ns_add_node(&ns_head, basil_node_id) != 0) {
+ if (ns_add_node(&ns_head, basil_node_id, false) != 0) {
error("can not add node %s (nid%05u)", node_ptr->name,
basil_node_id);
free_nodespec(ns_head);
@@ -803,8 +840,14 @@
snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id);
user = uid_to_string(job_ptr->user_id);
+
+ if (job_ptr->gres_list)
+ bap = build_accel_param(job_ptr);
+ else
+ bap = NULL;
+
rc = basil_reserve(user, batch_id, mppwidth, mppdepth, mppnppn,
- mppmem, ns_head, NULL);
+ mppmem, ns_head, bap);
xfree(user);
if (rc <= 0) {
/* errno value will be resolved by select_g_job_begin() */
@@ -915,6 +958,73 @@
return SLURM_SUCCESS;
}
+void *_sig_basil(void *args)
+{
+ args_sig_basil_t *args_sig_basil = (args_sig_basil_t *) args;
+ int rc;
+
+ sleep(args_sig_basil->delay);
+ rc = basil_signal_apids(args_sig_basil->resv_id,
+ args_sig_basil->signal, NULL);
+ if (rc) {
+ error("could not signal APIDs of resId %u: %s",
+ args_sig_basil->resv_id, basil_strerror(rc));
+ }
+ xfree(args);
+ return NULL;
+}
+
+/**
+ * queue_basil_signal - queue job signal on to any APIDs
+ * IN job_ptr - job to be signalled
+ * IN signal - signal(7) number
+ * IN delay - how long to delay the signal, in seconds
+ * Only signal job if an ALPS reservation exists (non-0 reservation ID).
+ */
+extern void queue_basil_signal(struct job_record *job_ptr, int signal,
+ uint16_t delay)
+{
+ args_sig_basil_t *args_sig_basil;
+ pthread_attr_t attr_sig_basil;
+ pthread_t thread_sig_basil;
+ uint32_t resv_id;
+
+ if (_get_select_jobinfo(job_ptr->select_jobinfo->data,
+ SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
+ error("can not read resId for JobId=%u", job_ptr->job_id);
+ return;
+ }
+ if (resv_id == 0)
+ return;
+ if ((delay == 0) || (delay == (uint16_t) NO_VAL)) {
+ /* Send the signal now */
+ int rc = basil_signal_apids(resv_id, signal, NULL);
+
+ if (rc)
+ error("could not signal APIDs of resId %u: %s", resv_id,
+ basil_strerror(rc));
+ return;
+ }
+
+ /* Create a thread to send the signal later */
+ slurm_attr_init(&attr_sig_basil);
+ if (pthread_attr_setdetachstate(&attr_sig_basil,
+ PTHREAD_CREATE_DETACHED)) {
+ error("pthread_attr_setdetachstate error %m");
+ return;
+ }
+ args_sig_basil = xmalloc(sizeof(args_sig_basil_t));
+ args_sig_basil->resv_id = resv_id;
+ args_sig_basil->signal = signal;
+ args_sig_basil->delay = delay;
+ if (pthread_create(&thread_sig_basil, &attr_sig_basil,
+ _sig_basil, (void *) args_sig_basil)) {
+ error("pthread_create error %m");
+ return;
+ }
+ slurm_attr_destroy(&attr_sig_basil);
+}
+
/**
* do_basil_release - release an (unconfirmed) BASIL reservation
* IN job_ptr - pointer to job which has just been deallocated resources
diff --git a/src/plugins/select/cray/basil_interface.h b/src/plugins/select/cray/basil_interface.h
index f4df5f9..31ed5bc 100644
--- a/src/plugins/select/cray/basil_interface.h
+++ b/src/plugins/select/cray/basil_interface.h
@@ -60,6 +60,8 @@
extern int do_basil_signal(struct job_record *job_ptr, int signal);
extern int do_basil_release(struct job_record *job_ptr);
extern int do_basil_switch(struct job_record *job_ptr, bool suspend);
+extern void queue_basil_signal(struct job_record *job_ptr, int signal,
+ uint16_t delay);
#else /* !HAVE_CRAY */
static inline int basil_node_ranking(struct node_record *ig, int nore)
{
@@ -91,6 +93,12 @@
return SLURM_SUCCESS;
}
+static inline void queue_basil_signal(struct job_record *job_ptr, int signal,
+ uint16_t delay)
+{
+ return;
+}
+
static inline int do_basil_release(struct job_record *job_ptr)
{
return SLURM_SUCCESS;
diff --git a/src/plugins/select/cray/libalps/Makefile.am b/src/plugins/select/cray/libalps/Makefile.am
index 1f73a51..7092e4a 100644
--- a/src/plugins/select/cray/libalps/Makefile.am
+++ b/src/plugins/select/cray/libalps/Makefile.am
@@ -22,6 +22,7 @@
do_confirm.c \
do_switch.c \
memory_handling.c \
+ memory_handling.h \
popen2.c \
atoul.c
libalps_la_CFLAGS = $(MYSQL_CFLAGS)
diff --git a/src/plugins/select/cray/libalps/Makefile.in b/src/plugins/select/cray/libalps/Makefile.in
index c49d555..b713e61 100644
--- a/src/plugins/select/cray/libalps/Makefile.in
+++ b/src/plugins/select/cray/libalps/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -194,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -336,6 +338,7 @@
do_confirm.c \
do_switch.c \
memory_handling.c \
+ memory_handling.h \
popen2.c \
atoul.c
@@ -385,7 +388,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libalps.la: $(libalps_la_OBJECTS) $(libalps_la_DEPENDENCIES)
+libalps.la: $(libalps_la_OBJECTS) $(libalps_la_DEPENDENCIES) $(EXTRA_libalps_la_DEPENDENCIES)
$(libalps_la_LINK) $(libalps_la_OBJECTS) $(libalps_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -638,10 +641,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/cray/libalps/basil_request.c b/src/plugins/select/cray/libalps/basil_request.c
index 13c238d..97ec0db 100644
--- a/src/plugins/select/cray/libalps/basil_request.c
+++ b/src/plugins/select/cray/libalps/basil_request.c
@@ -6,56 +6,122 @@
* Licensed under the GPLv2.
*/
#include "parser_internal.h"
+#include <stdarg.h>
+int log_sel = -1;
+char *xml_log_loc = NULL;
+char xml_log_file_name[256] = "";
+
+/*
+ * Function: _write_xml
+ * Purpose: Intercepts SLURM's ALPS BASIL XML requests so that it can
+ * logged it as well as pass to ALPS BASIL.
+ * Use: Logging is controlled by environmental variables:
+ * 0) XML_LOG set to enable logging
+ * 1) XML_LOG_LOC not set => Log to generic "slurm_basil_xml.log" file
+ * 2) XML_LOG_LOC="SLURM" => Log to the common slurmctld.log file
+ * 3) XML_LOG_LOC=<path> => Log to file specified by the path here
+ *
+ * Note: Any change in environmental variables requires re-start of slurmctld
+ * to take effect.
+ */
+static int _write_xml(FILE* fp, const char* format, ...) {
+ char buff[1024];
+ va_list ap;
+ int rc;
+ FILE* fplog = NULL;
+
+ /* Write to ALPS BASIL itself as we would have done without logging. */
+ va_start(ap, format);
+ vsnprintf(buff, sizeof(buff), format, ap);
+ va_end(ap);
+ rc = fprintf(fp, "%s", buff);
+ if (log_sel < 1)
+ return rc;
+
+ /* Perform the appropriate logging. */
+ if (xml_log_file_name[0] != '\0') {
+ /* If we have a specific file name, try to open it. */
+ fplog = fopen(xml_log_file_name, "a+");
+ if (fplog == NULL) {
+ error("Problem with fdopen() of %s: %m",
+ xml_log_file_name);
+ }
+ }
+ if (fplog) {
+ fprintf(fplog, "%s", buff);
+ fclose(fplog);
+ } else
+ info("%s", buff);
+
+ return rc;
+}
+
+static void _init_log_config(void)
+{
+ if (getenv("XML_LOG"))
+ log_sel = 1;
+ else
+ log_sel = 0;
+ xml_log_loc = getenv("XML_LOG_LOC");
+ if (xml_log_loc && strcmp(xml_log_loc, "SLURM") &&
+ (strlen(xml_log_loc) < sizeof(xml_log_file_name))) {
+ strcpy(xml_log_file_name, xml_log_loc);
+ } else {
+ sprintf(xml_log_file_name, "slurm_basil_xml.log");
+ }
+}
static void _rsvn_write_reserve_xml(FILE *fp, struct basil_reservation *r)
{
struct basil_rsvn_param *param;
- fprintf(fp, " <ReserveParamArray user_name=\"%s\"", r->user_name);
+ _write_xml(fp, " <ReserveParamArray user_name=\"%s\"", r->user_name);
if (*r->batch_id != '\0')
- fprintf(fp, " batch_id=\"%s\"", r->batch_id);
+ _write_xml(fp, " batch_id=\"%s\"", r->batch_id);
if (*r->account_name != '\0')
- fprintf(fp, " account_name=\"%s\"", r->account_name);
- fprintf(fp, ">\n");
+ _write_xml(fp, " account_name=\"%s\"", r->account_name);
+ _write_xml(fp, ">\n");
for (param = r->params; param; param = param->next) {
- fprintf(fp, " <ReserveParam architecture=\"%s\" "
- "width=\"%ld\" depth=\"%ld\" nppn=\"%ld\"",
- nam_arch[param->arch],
- param->width, param->depth, param->nppn);
+ _write_xml(fp, " <ReserveParam architecture=\"%s\" "
+ "width=\"%ld\" depth=\"%ld\" nppn=\"%ld\"",
+ nam_arch[param->arch],
+ param->width, param->depth, param->nppn);
if (param->memory || param->labels ||
param->nodes || param->accel) {
- fprintf(fp, ">\n");
+ _write_xml(fp, ">\n");
} else {
- fprintf(fp, "/>\n");
+ _write_xml(fp, "/>\n");
continue;
}
if (param->memory) {
struct basil_memory_param *mem;
- fprintf(fp, " <MemoryParamArray>\n");
- for (mem = param->memory; mem; mem = mem->next)
- fprintf(fp, " <MemoryParam type=\"%s\""
- " size_mb=\"%u\"/>\n",
- nam_memtype[mem->type],
- mem->size_mb ? : 1);
- fprintf(fp, " </MemoryParamArray>\n");
+ _write_xml(fp, " <MemoryParamArray>\n");
+ for (mem = param->memory; mem; mem = mem->next) {
+ _write_xml(fp, " <MemoryParam type=\"%s\""
+ " size_mb=\"%u\"/>\n",
+ nam_memtype[mem->type],
+ mem->size_mb ? : 1);
+ }
+ _write_xml(fp, " </MemoryParamArray>\n");
}
if (param->labels) {
struct basil_label *label;
- fprintf(fp, " <LabelParamArray>\n");
+ _write_xml(fp, " <LabelParamArray>\n");
for (label = param->labels; label; label = label->next)
- fprintf(fp, " <LabelParam name=\"%s\""
- " type=\"%s\" disposition=\"%s\"/>\n",
- label->name, nam_labeltype[label->type],
- nam_ldisp[label->disp]);
+ _write_xml(fp, " <LabelParam name=\"%s\""
+ " type=\"%s\" disposition=\"%s\"/>\n",
+ label->name,
+ nam_labeltype[label->type],
+ nam_ldisp[label->disp]);
- fprintf(fp, " </LabelParamArray>\n");
+ _write_xml(fp, " </LabelParamArray>\n");
}
if (param->nodes && *param->nodes) {
@@ -71,31 +137,31 @@
* which had the same effect as supplying it once.
* Hence the array expression is actually not needed.
*/
- fprintf(fp, " <NodeParamArray>\n"
- " <NodeParam>%s</NodeParam>\n"
- " </NodeParamArray>\n", param->nodes);
+ _write_xml(fp, " <NodeParamArray>\n"
+ " <NodeParam>%s</NodeParam>\n"
+ " </NodeParamArray>\n", param->nodes);
}
if (param->accel) {
struct basil_accel_param *accel;
- fprintf(fp, " <AccelParamArray>\n");
+ _write_xml(fp, " <AccelParamArray>\n");
for (accel = param->accel; accel; accel = accel->next) {
- fprintf(fp, " <AccelParam type=\"%s\"",
- nam_acceltype[accel->type]);
+ _write_xml(fp, " <AccelParam type=\"%s\"",
+ nam_acceltype[accel->type]);
if (accel->memory_mb)
- fprintf(fp, " memory_mb=\"%u\"",
- accel->memory_mb);
- fprintf(fp, "/>\n");
+ _write_xml(fp, " memory_mb=\"%u\"",
+ accel->memory_mb);
+ _write_xml(fp, "/>\n");
}
- fprintf(fp, " </AccelParamArray>\n");
+ _write_xml(fp, " </AccelParamArray>\n");
}
- fprintf(fp, " </ReserveParam>\n");
+ _write_xml(fp, " </ReserveParam>\n");
}
- fprintf(fp, " </ReserveParamArray>\n"
- "</BasilRequest>\n");
+ _write_xml(fp, " </ReserveParamArray>\n"
+ "</BasilRequest>\n");
}
/*
@@ -111,6 +177,9 @@
FILE *apbasil;
pid_t pid;
+ if (log_sel == -1)
+ _init_log_config();
+
if (!cray_conf->apbasil) {
error("No alps client defined");
return 0;
@@ -128,44 +197,44 @@
fatal("fdopen(): %s", strerror(errno));
setlinebuf(apbasil);
- fprintf(apbasil, "<?xml version=\"1.0\"?>\n"
- "<BasilRequest protocol=\"%s\" method=\"%s\" ",
- bv_names[bp->version], bm_names[bp->method]);
+ _write_xml(apbasil, "<?xml version=\"1.0\"?>\n"
+ "<BasilRequest protocol=\"%s\" method=\"%s\" ",
+ bv_names[bp->version], bm_names[bp->method]);
switch (bp->method) {
case BM_engine:
- fprintf(apbasil, "type=\"ENGINE\"/>");
+ _write_xml(apbasil, "type=\"ENGINE\"/>");
break;
case BM_inventory:
- fprintf(apbasil, "type=\"INVENTORY\"/>");
+ _write_xml(apbasil, "type=\"INVENTORY\"/>");
break;
case BM_reserve:
- fprintf(apbasil, ">\n");
+ _write_xml(apbasil, ">\n");
_rsvn_write_reserve_xml(apbasil, bp->mdata.res);
break;
case BM_confirm:
if (bp->version == BV_1_0 && *bp->mdata.res->batch_id != '\0')
- fprintf(apbasil, "job_name=\"%s\" ",
- bp->mdata.res->batch_id);
- fprintf(apbasil, "reservation_id=\"%u\" %s=\"%llu\"/>\n",
- bp->mdata.res->rsvn_id,
- bp->version >= BV_3_1 ? "pagg_id" : "admin_cookie",
- (unsigned long long)bp->mdata.res->pagg_id);
+ _write_xml(apbasil, "job_name=\"%s\" ",
+ bp->mdata.res->batch_id);
+ _write_xml(apbasil, "reservation_id=\"%u\" %s=\"%llu\"/>\n",
+ bp->mdata.res->rsvn_id,
+ bp->version >= BV_3_1 ? "pagg_id" : "admin_cookie",
+ (unsigned long long)bp->mdata.res->pagg_id);
break;
case BM_release:
- fprintf(apbasil, "reservation_id=\"%u\"/>\n",
- bp->mdata.res->rsvn_id);
+ _write_xml(apbasil, "reservation_id=\"%u\"/>\n",
+ bp->mdata.res->rsvn_id);
break;
case BM_switch:
{
char *suspend = bp->mdata.res->suspended ? "OUT" : "IN";
- fprintf(apbasil, ">\n");
- fprintf(apbasil, " <ReservationArray>\n");
- fprintf(apbasil, " <Reservation reservation_id=\"%u\" "
- "action=\"%s\"/>\n",
- bp->mdata.res->rsvn_id, suspend);
- fprintf(apbasil, " </ReservationArray>\n");
- fprintf(apbasil, "</BasilRequest>\n");
+ _write_xml(apbasil, ">\n");
+ _write_xml(apbasil, " <ReservationArray>\n");
+ _write_xml(apbasil, " <Reservation reservation_id=\"%u\" "
+ "action=\"%s\"/>\n",
+ bp->mdata.res->rsvn_id, suspend);
+ _write_xml(apbasil, " </ReservationArray>\n");
+ _write_xml(apbasil, "</BasilRequest>\n");
}
break;
default: /* ignore BM_none, BM_MAX, and BM_UNKNOWN covered above */
@@ -177,8 +246,10 @@
rc = parse_basil(bp, from_child);
ec = wait_for_child(pid);
- if (ec)
+ if (ec) {
error("%s child process for BASIL %s method exited with %d",
cray_conf->apbasil, bm_names[bp->method], ec);
+ }
+
return rc;
}
diff --git a/src/plugins/select/cray/libalps/do_query.c b/src/plugins/select/cray/libalps/do_query.c
index 275e9db..ff418e3 100644
--- a/src/plugins/select/cray/libalps/do_query.c
+++ b/src/plugins/select/cray/libalps/do_query.c
@@ -75,6 +75,8 @@
if (_get_alps_engine(engine_version, sizeof(engine_version)) == NULL)
fatal("can not determine ALPS Engine version");
+ else if (strncmp(engine_version, "5.1.0", 5) == 0)
+ bv = BV_5_1;
else if (strncmp(engine_version, "5.0.0", 5) == 0)
bv = BV_5_0;
else if (strncmp(engine_version, "4.1.0", 5) == 0)
diff --git a/src/plugins/select/cray/libalps/do_reserve.c b/src/plugins/select/cray/libalps/do_reserve.c
index 2ec1a25..337f092 100644
--- a/src/plugins/select/cray/libalps/do_reserve.c
+++ b/src/plugins/select/cray/libalps/do_reserve.c
@@ -81,7 +81,7 @@
* @nppn: mppnppn >= 0 (0 meaning 'use default')
* @mem_mb: mppmem >= 0 (0 meaning 'use defaults', else size in MB)
* @mppnodes: comma-separated nodelist (will be freed if not NULL)
- * @accel: accelerator parameters or NULL
+ * @accel: accelerator parameters or NULL (will be freed if not NULL)
*
* The reservation ID is initially 0, since 0 is an invalid reservation ID.
*/
@@ -96,12 +96,7 @@
assert(user != NULL && *user != '\0');
- if (width <= 0 || depth < 0 || nppn < 0)
- return NULL;
-
res = xmalloc(sizeof(*res));
- if (res == NULL)
- return NULL;
res->rsvn_id = 0;
strncpy(res->user_name, user, sizeof(res->user_name));
diff --git a/src/plugins/select/cray/libalps/parser_basil_3.1.c b/src/plugins/select/cray/libalps/parser_basil_3.1.c
index a7d9885..ec8974f 100644
--- a/src/plugins/select/cray/libalps/parser_basil_3.1.c
+++ b/src/plugins/select/cray/libalps/parser_basil_3.1.c
@@ -16,7 +16,7 @@
if (atou32(attribs[0], &node_id) < 0)
fatal("illegal node_id = %s", attribs[0]);
- if (ns_add_node(&ud->bp->mdata.res->rsvd_nodes, node_id) < 0)
+ if (ns_add_node(&ud->bp->mdata.res->rsvd_nodes, node_id, true) < 0)
fatal("could not add node %u", node_id);
}
diff --git a/src/plugins/select/cray/libalps/parser_common.c b/src/plugins/select/cray/libalps/parser_common.c
index 26c7283..fa12bde 100644
--- a/src/plugins/select/cray/libalps/parser_common.c
+++ b/src/plugins/select/cray/libalps/parser_common.c
@@ -44,7 +44,7 @@
*/
void extract_attributes(const XML_Char **attr_list, char **reqv, int reqc)
{
- const XML_Char **attr, *val;
+ const XML_Char **attr, *val = NULL;
while (--reqc >= 0) {
for (attr = attr_list, val = NULL; *attr; attr += 2)
@@ -57,6 +57,7 @@
if (val == NULL)
fatal("unspecified '%s' attribute", reqv[reqc]);
reqv[reqc] = (XML_Char *)val;
+ val = NULL;
}
}
@@ -487,7 +488,8 @@
[BV_3_1] = basil_3_1_elements,
[BV_4_0] = basil_4_0_elements,
[BV_4_1] = basil_4_0_elements,
- [BV_5_0] = basil_4_0_elements
+ [BV_5_0] = basil_4_0_elements,
+ [BV_5_1] = basil_4_0_elements
};
/**
diff --git a/src/plugins/select/cray/libemulate/Makefile.in b/src/plugins/select/cray/libemulate/Makefile.in
index 63058d7..2517629 100644
--- a/src/plugins/select/cray/libemulate/Makefile.in
+++ b/src/plugins/select/cray/libemulate/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -188,6 +188,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -224,6 +225,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -366,7 +368,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libalps.la: $(libalps_la_OBJECTS) $(libalps_la_DEPENDENCIES)
+libalps.la: $(libalps_la_OBJECTS) $(libalps_la_DEPENDENCIES) $(EXTRA_libalps_la_DEPENDENCIES)
$(libalps_la_LINK) $(libalps_la_OBJECTS) $(libalps_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -515,10 +517,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/cray/nodespec.c b/src/plugins/select/cray/nodespec.c
index f5ac145..3bf8364 100644
--- a/src/plugins/select/cray/nodespec.c
+++ b/src/plugins/select/cray/nodespec.c
@@ -1,6 +1,8 @@
/*
- * Strictly-ordered, singly-linked list to represent disjoint node ranges
- * of the type 'a' (single node) or 'a-b' (range, with a < b).
+ * Functions to maintain a list of numeric node ranges. Depending upon the
+ * parameter "sorted" used when adding elements, this list may be a strictly-
+ * ordered, singly-linked list to represent disjoint node ranges of the type
+ * 'a' (single node) or 'a-b' (range, with a < b).
*
* For example, '1,7-8,20,33-29'
*
@@ -11,39 +13,51 @@
#define CRAY_MAX_DIGITS 5 /* nid%05d format */
/* internal constructor */
-static struct nodespec *ns_new(uint32_t start, uint32_t end)
+static struct nodespec *_ns_new(uint32_t start, uint32_t end)
{
struct nodespec *new = xmalloc(sizeof(*new));
- if (new) {
- new->start = start;
- new->end = end;
- }
+ new->start = start;
+ new->end = end;
return new;
}
/**
- * ns_add_range - Insert/merge new range into existing nodespec list.
+ * _ns_add_range - Insert/merge new range into existing nodespec list.
* @head: head of the ordered list
* @new_start: start value of node range to insert
* @new_end: end value of node range to insert
+ * @sorted: if set, then maintain @head as duplicate-free list, ordered
+ * in ascending order of node-specifier intervals, with a gap of
+ * at least 2 between adjacent entries. Otherwise maintain @head
+ * as a list of elements in the order added
*
- * Maintains @head as duplicate-free list, ordered in ascending order of
- * node-specifier intervals, with a gap of at least 2 between adjacent entries.
+ * Maintains @head as list
* Returns 0 if ok, -1 on failure.
*/
-static int ns_add_range(struct nodespec **head,
- uint32_t new_start,
- uint32_t new_end)
+static int _ns_add_range(struct nodespec **head,
+ uint32_t new_start, uint32_t new_end, bool sorted)
{
struct nodespec *cur = *head, *next;
assert(new_start <= new_end);
+ if (!sorted) {
+ if (cur) {
+ while (cur->next) /* find end of list */
+ cur = cur->next;
+ if (new_start == (cur->end + 1))
+ cur->end = new_end;
+ else
+ cur->next = _ns_new(new_start, new_end);
+ } else {
+ *head = _ns_new(new_start, new_end);
+ }
+ return 0;
+ }
+
if (cur == NULL || new_end + 1 < cur->start) {
- *head = ns_new(new_start, new_end);
- if (*head == NULL)
- return -1;
+ *head = _ns_new(new_start, new_end);
(*head)->next = cur;
return 0;
}
@@ -52,9 +66,7 @@
new_start > cur->end + 1;
cur = next, next = cur->next)
if (next == NULL || new_end + 1 < next->start) {
- next = ns_new(new_start, new_end);
- if (next == NULL)
- return -1;
+ next = _ns_new(new_start, new_end);
next->next = cur->next;
cur->next = next;
return 0;
@@ -80,9 +92,9 @@
}
/** Add a single node (1-element range) */
-int ns_add_node(struct nodespec **head, uint32_t node_id)
+extern int ns_add_node(struct nodespec **head, uint32_t node_id, bool sorted)
{
- return ns_add_range(head, node_id, node_id);
+ return _ns_add_range(head, node_id, node_id, sorted);
}
/* count the number of nodes starting at @head */
diff --git a/src/plugins/select/cray/other_select.c b/src/plugins/select/cray/other_select.c
index 18ddd3a..dd2f63d 100644
--- a/src/plugins/select/cray/other_select.c
+++ b/src/plugins/select/cray/other_select.c
@@ -112,6 +112,7 @@
"select_p_select_jobinfo_xstrdup",
"select_p_update_block",
"select_p_update_sub_node",
+ "select_p_fail_cnode",
"select_p_get_info_from_plugin",
"select_p_update_node_config",
"select_p_update_node_state",
@@ -751,6 +752,20 @@
}
/*
+ * Fail certain cnodes in a blocks midplane (usually comes from the
+ * IBM runjob mux)
+ * IN step_ptr - step that failed
+ */
+extern int other_fail_cnode (struct step_record *step_ptr)
+{
+ if (other_select_init() < 0)
+ return SLURM_ERROR;
+
+ return (*(other_select_context->ops.
+ fail_cnode))(step_ptr);
+}
+
+/*
* Get select data from a plugin
* IN dinfo - type of data to get from the node record
* (see enum select_plugindata_info)
diff --git a/src/plugins/select/cray/other_select.h b/src/plugins/select/cray/other_select.h
index ba2f746..083ef4d 100644
--- a/src/plugins/select/cray/other_select.h
+++ b/src/plugins/select/cray/other_select.h
@@ -102,6 +102,13 @@
extern int other_update_sub_node(update_block_msg_t *block_desc_ptr);
/*
+ * Fail certain cnodes in a blocks midplane (usually comes from the
+ * IBM runjob mux)
+ * IN step_ptr - step that failed
+ */
+extern int other_fail_cnode (struct step_record *step_ptr);
+
+/*
* Get select data from a plugin
* IN node_pts - current node record
* IN dinfo - type of data to get from the node record
diff --git a/src/plugins/select/cray/parser_common.h b/src/plugins/select/cray/parser_common.h
index cb161b7..4ad70da 100644
--- a/src/plugins/select/cray/parser_common.h
+++ b/src/plugins/select/cray/parser_common.h
@@ -21,7 +21,8 @@
[BV_3_1] = "1.1",
[BV_4_0] = "1.2",
[BV_4_1] = "1.2",
- [BV_5_0] = "1.2"
+ [BV_5_0] = "1.2",
+ [BV_5_1] = "1.2"
};
const char *bv_names_long[BV_MAX] = { /* Actual version name */
@@ -31,7 +32,8 @@
[BV_3_1] = "3.1",
[BV_4_0] = "4.0",
[BV_4_1] = "4.1",
- [BV_5_0] = "5.0"
+ [BV_5_0] = "5.0",
+ [BV_5_1] = "5.1"
};
/* Basil methods */
diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c
index db7b76e..7b65bce 100644
--- a/src/plugins/select/cray/select_cray.c
+++ b/src/plugins/select/cray/select_cray.c
@@ -88,6 +88,10 @@
bitstr_t *avail_node_bitmap = NULL;
#endif
+#if !defined (SIGRTMIN) && defined(__NetBSD__)
+/* protected definition in <sys/signal.h> */
+# define SIGRTMIN (SIGPWR+1)
+#endif
/*
* SIGRTMIN isn't defined on osx, so lets keep it above the signals in use.
*/
@@ -135,6 +139,16 @@
uint32_t plugin_id = 104;
const uint32_t plugin_version = 100;
+static bool _zero_size_job ( struct job_record *job_ptr )
+{
+ xassert (job_ptr);
+ if (job_ptr->details &&
+ (job_ptr->details->min_nodes == 0) &&
+ (job_ptr->details->max_nodes == 0))
+ return true;
+ return false;
+}
+
/*
* init() is called when the plugin is loaded, before any other functions
* are called. Put global initialization here.
@@ -244,6 +258,11 @@
List preemptee_candidates,
List *preemptee_job_list)
{
+ if (min_nodes == 0) {
+ /* Allocate resources only on a front-end node */
+ job_ptr->details->min_cpus = 0;
+ }
+
return other_job_test(job_ptr, bitmap, min_nodes, max_nodes,
req_nodes, mode, preemptee_candidates,
preemptee_job_list);
@@ -253,7 +272,8 @@
{
xassert(job_ptr);
- if (do_basil_reserve(job_ptr) != SLURM_SUCCESS) {
+ if ((!_zero_size_job(job_ptr)) &&
+ (do_basil_reserve(job_ptr) != SLURM_SUCCESS)) {
job_ptr->state_reason = WAIT_RESOURCES;
xfree(job_ptr->state_desc);
return SLURM_ERROR;
@@ -275,7 +295,7 @@
* means that we need to confirm only if batch_flag is 0,
* and execute the other_job_ready() only in slurmctld.
*/
- if (!job_ptr->batch_flag)
+ if (!job_ptr->batch_flag && !_zero_size_job(job_ptr))
rc = do_basil_confirm(job_ptr);
if (rc != SLURM_SUCCESS || (job_ptr->job_state == (uint16_t)NO_VAL))
return rc;
@@ -328,8 +348,19 @@
do_basil_release(job_ptr);
}
- if (do_basil_signal(job_ptr, signal) != SLURM_SUCCESS)
- return SLURM_ERROR;
+ if (!_zero_size_job(job_ptr)) {
+ if (signal != SIGKILL) {
+ if (do_basil_signal(job_ptr, signal) != SLURM_SUCCESS)
+ return SLURM_ERROR;
+ } else {
+ uint16_t kill_wait = slurm_get_kill_wait();
+ if (do_basil_signal(job_ptr, SIGCONT) != SLURM_SUCCESS)
+ return SLURM_ERROR;
+ if (do_basil_signal(job_ptr, SIGTERM) != SLURM_SUCCESS)
+ return SLURM_ERROR;
+ queue_basil_signal(job_ptr, SIGKILL, kill_wait);
+ }
+ }
return other_job_signal(job_ptr, signal);
}
@@ -337,7 +368,8 @@
{
if (job_ptr == NULL)
return SLURM_SUCCESS;
- if (do_basil_release(job_ptr) != SLURM_SUCCESS)
+ if ((!_zero_size_job(job_ptr)) &&
+ (do_basil_release(job_ptr) != SLURM_SUCCESS))
return SLURM_ERROR;
/*
* Convention: like select_p_job_ready, may be called also from
@@ -354,7 +386,8 @@
if (job_ptr == NULL)
return SLURM_SUCCESS;
- if (do_basil_switch(job_ptr, 1) != SLURM_SUCCESS)
+ if ((!_zero_size_job(job_ptr)) &&
+ (do_basil_switch(job_ptr, 1) != SLURM_SUCCESS))
return SLURM_ERROR;
return other_job_suspend(job_ptr, indf_susp);
@@ -365,7 +398,8 @@
if (job_ptr == NULL)
return SLURM_SUCCESS;
- if (do_basil_switch(job_ptr, 0) != SLURM_SUCCESS)
+ if ((!_zero_size_job(job_ptr)) &&
+ (do_basil_switch(job_ptr, 0) != SLURM_SUCCESS))
return SLURM_ERROR;
return other_job_resume(job_ptr, indf_susp);
@@ -758,6 +792,11 @@
return other_update_sub_node(block_desc_ptr);
}
+extern int select_p_fail_cnode(struct step_record *step_ptr)
+{
+ return other_fail_cnode(step_ptr);
+}
+
extern int select_p_get_info_from_plugin(enum select_jobdata_type info,
struct job_record *job_ptr,
void *data)
diff --git a/src/plugins/select/linear/Makefile.in b/src/plugins/select/linear/Makefile.in
index 2d749ab..0b187e1 100644
--- a/src/plugins/select/linear/Makefile.in
+++ b/src/plugins/select/linear/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
select_linear_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-select_linear.la: $(select_linear_la_OBJECTS) $(select_linear_la_DEPENDENCIES)
+select_linear.la: $(select_linear_la_OBJECTS) $(select_linear_la_DEPENDENCIES) $(EXTRA_select_linear_la_DEPENDENCIES)
$(select_linear_la_LINK) -rpath $(pkglibdir) $(select_linear_la_OBJECTS) $(select_linear_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index 1d0d62e..5bca3c8 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -1967,7 +1967,8 @@
gres_list = node_ptr->gres_list;
gres_plugin_job_alloc(job_ptr->gres_list, gres_list,
node_cnt, node_offset, cpu_cnt,
- job_ptr->job_id, node_ptr->name);
+ job_ptr->job_id, node_ptr->name,
+ NULL);
gres_plugin_node_state_log(gres_list, node_ptr->name);
}
@@ -2234,7 +2235,8 @@
job_resrcs_ptr->
cpus[node_offset],
job_ptr->job_id,
- node_ptr->name);
+ node_ptr->name,
+ NULL);
}
part_cr_ptr = cr_ptr->nodes[i].parts;
@@ -2437,6 +2439,7 @@
(ListCmpF)_sort_usable_nodes_dec);
rc = EINVAL;
list_iterator_destroy(job_iterator);
+ _free_cr(exp_cr);
goto top;
}
}
@@ -3142,6 +3145,7 @@
{
int rc = SLURM_SUCCESS;
uint16_t *uint16 = (uint16_t *) data;
+ char **tmp_char = (char **) data;
select_nodeinfo_t **select_nodeinfo = (select_nodeinfo_t **) data;
if (nodeinfo == NULL) {
@@ -3167,6 +3171,10 @@
case SELECT_NODEDATA_PTR:
*select_nodeinfo = nodeinfo;
break;
+ case SELECT_NODEDATA_RACK_MP:
+ case SELECT_NODEDATA_EXTRA_INFO:
+ *tmp_char = NULL;
+ break;
default:
error("Unsupported option %d for get_nodeinfo.", dinfo);
rc = SLURM_ERROR;
@@ -3288,6 +3296,11 @@
return SLURM_SUCCESS;
}
+extern int select_p_fail_cnode(struct step_record *step_ptr)
+{
+ return SLURM_SUCCESS;
+}
+
extern int select_p_get_info_from_plugin (enum select_plugindata_info dinfo,
struct job_record *job_ptr,
void *data)
diff --git a/src/plugins/switch/Makefile.in b/src/plugins/switch/Makefile.in
index b078e12..7ed50aa 100644
--- a/src/plugins/switch/Makefile.in
+++ b/src/plugins/switch/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/switch/elan/Makefile.in b/src/plugins/switch/elan/Makefile.in
index 44d24fe..7176ba1 100644
--- a/src/plugins/switch/elan/Makefile.in
+++ b/src/plugins/switch/elan/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
am__DEPENDENCIES_1 =
@@ -214,6 +220,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -250,6 +257,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -425,7 +433,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-switch_elan.la: $(switch_elan_la_OBJECTS) $(switch_elan_la_DEPENDENCIES)
+switch_elan.la: $(switch_elan_la_OBJECTS) $(switch_elan_la_DEPENDENCIES) $(EXTRA_switch_elan_la_DEPENDENCIES)
$(switch_elan_la_LINK) $(am_switch_elan_la_rpath) $(switch_elan_la_OBJECTS) $(switch_elan_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -563,10 +571,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/switch/federation/Makefile.in b/src/plugins/switch/federation/Makefile.in
index d9d5955..bd92e58 100644
--- a/src/plugins/switch/federation/Makefile.in
+++ b/src/plugins/switch/federation/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
switch_federation_la_LIBADD =
@@ -217,6 +223,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -253,6 +260,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -427,7 +435,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-switch_federation.la: $(switch_federation_la_OBJECTS) $(switch_federation_la_DEPENDENCIES)
+switch_federation.la: $(switch_federation_la_OBJECTS) $(switch_federation_la_DEPENDENCIES) $(EXTRA_switch_federation_la_DEPENDENCIES)
$(switch_federation_la_LINK) $(am_switch_federation_la_rpath) $(switch_federation_la_OBJECTS) $(switch_federation_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -565,10 +573,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/switch/none/Makefile.in b/src/plugins/switch/none/Makefile.in
index 981d130..d56f183 100644
--- a/src/plugins/switch/none/Makefile.in
+++ b/src/plugins/switch/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
switch_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-switch_none.la: $(switch_none_la_OBJECTS) $(switch_none_la_DEPENDENCIES)
+switch_none.la: $(switch_none_la_OBJECTS) $(switch_none_la_DEPENDENCIES) $(EXTRA_switch_none_la_DEPENDENCIES)
$(switch_none_la_LINK) -rpath $(pkglibdir) $(switch_none_la_OBJECTS) $(switch_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/task/Makefile.in b/src/plugins/task/Makefile.in
index 6d0b8f0..3a24a68 100644
--- a/src/plugins/task/Makefile.in
+++ b/src/plugins/task/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/task/affinity/Makefile.in b/src/plugins/task/affinity/Makefile.in
index 31cb39b..47a96d5 100644
--- a/src/plugins/task/affinity/Makefile.in
+++ b/src/plugins/task/affinity/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
task_affinity_la_LIBADD =
@@ -220,6 +226,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -256,6 +263,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -430,7 +438,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-task_affinity.la: $(task_affinity_la_OBJECTS) $(task_affinity_la_DEPENDENCIES)
+task_affinity.la: $(task_affinity_la_OBJECTS) $(task_affinity_la_DEPENDENCIES) $(EXTRA_task_affinity_la_DEPENDENCIES)
$(task_affinity_la_LINK) $(am_task_affinity_la_rpath) $(task_affinity_la_OBJECTS) $(task_affinity_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -572,10 +580,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/task/affinity/task_affinity.c b/src/plugins/task/affinity/task_affinity.c
index 88d3db6..1e4ad92 100644
--- a/src/plugins/task/affinity/task_affinity.c
+++ b/src/plugins/task/affinity/task_affinity.c
@@ -244,11 +244,21 @@
* happening reliably. */
if (conf->task_plugin_param & CPU_BIND_CPUSETS) {
char base[PATH_MAX], path[PATH_MAX];
+#ifdef MULTIPLE_SLURMD
+ if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u",
+ CPUSET_DIR,
+ (conf->node_name != NULL)?conf->node_name:"",
+ job_id) > PATH_MAX) {
+ error("cpuset path too long");
+ return SLURM_ERROR;
+ }
+#else
if (snprintf(base, PATH_MAX, "%s/slurm%u",
CPUSET_DIR, job_id) > PATH_MAX) {
error("cpuset path too long");
return SLURM_ERROR;
}
+#endif
if (rmdir(base) && (errno == ENOTEMPTY)) {
DIR *dirp;
struct dirent entry;
@@ -294,11 +304,21 @@
if (!(conf->task_plugin_param & CPU_BIND_CPUSETS))
return SLURM_SUCCESS;
+#ifdef MULTIPLE_SLURMD
+ if (snprintf(path, PATH_MAX, "%s/slurm_%s_%u",
+ CPUSET_DIR,
+ (conf->node_name != NULL)?conf->node_name:"",
+ job->jobid) > PATH_MAX) {
+ error("cpuset path too long");
+ return SLURM_ERROR;
+ }
+#else
if (snprintf(path, PATH_MAX, "%s/slurm%u",
CPUSET_DIR, job->jobid) > PATH_MAX) {
error("cpuset path too long");
return SLURM_ERROR;
}
+#endif
return slurm_build_cpuset(CPUSET_DIR, path, job->uid, job->gid);
}
@@ -318,11 +338,21 @@
if (conf->task_plugin_param & CPU_BIND_CPUSETS) {
info("Using cpuset affinity for tasks");
+#ifdef MULTIPLE_SLURMD
+ if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u",
+ CPUSET_DIR,
+ (conf->node_name != NULL)?conf->node_name:"",
+ job->jobid) > PATH_MAX) {
+ error("cpuset path too long");
+ return SLURM_ERROR;
+ }
+#else
if (snprintf(base, PATH_MAX, "%s/slurm%u",
CPUSET_DIR, job->jobid) > PATH_MAX) {
error("cpuset path too long");
return SLURM_ERROR;
}
+#endif
if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d",
base, job->jobid, job->stepid,
job->envtp->localid) > PATH_MAX) {
@@ -416,11 +446,21 @@
* happening reliably. */
if (conf->task_plugin_param & CPU_BIND_CPUSETS) {
char base[PATH_MAX], path[PATH_MAX];
+#ifdef MULTIPLE_SLURMD
+ if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u",
+ CPUSET_DIR,
+ (conf->node_name != NULL)?conf->node_name:"",
+ job->jobid) > PATH_MAX) {
+ error("cpuset path too long");
+ return SLURM_ERROR;
+ }
+#else
if (snprintf(base, PATH_MAX, "%s/slurm%u",
CPUSET_DIR, job->jobid) > PATH_MAX) {
error("cpuset path too long");
return SLURM_ERROR;
}
+#endif
if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d",
base, job->jobid, job->stepid,
job->envtp->localid) > PATH_MAX) {
diff --git a/src/plugins/task/cgroup/Makefile.in b/src/plugins/task/cgroup/Makefile.in
index 7fbea49..d7874e8 100644
--- a/src/plugins/task/cgroup/Makefile.in
+++ b/src/plugins/task/cgroup/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
task_cgroup_la_LIBADD =
@@ -211,6 +217,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -247,6 +254,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -412,7 +420,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-task_cgroup.la: $(task_cgroup_la_OBJECTS) $(task_cgroup_la_DEPENDENCIES)
+task_cgroup.la: $(task_cgroup_la_OBJECTS) $(task_cgroup_la_DEPENDENCIES) $(EXTRA_task_cgroup_la_DEPENDENCIES)
$(task_cgroup_la_LINK) -rpath $(pkglibdir) $(task_cgroup_la_OBJECTS) $(task_cgroup_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -580,10 +588,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/task/cgroup/task_cgroup.c b/src/plugins/task/cgroup/task_cgroup.c
index 3bea7f4..b7a4300 100644
--- a/src/plugins/task/cgroup/task_cgroup.c
+++ b/src/plugins/task/cgroup/task_cgroup.c
@@ -106,7 +106,11 @@
/* enable subsystems based on conf */
if (slurm_cgroup_conf.constrain_cores) {
use_cpuset = true;
- task_cgroup_cpuset_init(&slurm_cgroup_conf);
+ if (task_cgroup_cpuset_init(&slurm_cgroup_conf) !=
+ SLURM_SUCCESS) {
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_ERROR;
+ }
debug("%s: now constraining jobs allocated cores",
plugin_type);
}
@@ -114,7 +118,11 @@
if (slurm_cgroup_conf.constrain_ram_space ||
slurm_cgroup_conf.constrain_swap_space) {
use_memory = true;
- task_cgroup_memory_init(&slurm_cgroup_conf);
+ if (task_cgroup_memory_init(&slurm_cgroup_conf) !=
+ SLURM_SUCCESS) {
+ free_slurm_cgroup_conf(&slurm_cgroup_conf);
+ return SLURM_ERROR;
+ }
debug("%s: now constraining jobs allocated memory",
plugin_type);
}
diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c
index 78df78a..8fad793 100644
--- a/src/plugins/task/cgroup/task_cgroup_cpuset.c
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c
@@ -3,6 +3,8 @@
*****************************************************************************
* Copyright (C) 2009 CEA/DAM/DIF
* Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ * Portions copyright (C) 2012 Bull
+ * Written by Martin Perry <martin.perry@bull.com>
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
@@ -47,16 +49,48 @@
#include "src/slurmd/slurmstepd/slurmstepd_job.h"
#include "src/slurmd/slurmd/slurmd.h"
+#include "src/common/bitstring.h"
#include "src/common/xstring.h"
#include "src/common/xcgroup_read_config.h"
#include "src/common/xcgroup.h"
-#include "src/common/xcpuinfo.h"
#include "task_cgroup.h"
#ifdef HAVE_HWLOC
#include <hwloc.h>
#include <hwloc/glibc-sched.h>
+
+# if HWLOC_API_VERSION <= 0x00010000
+/* After this version the cpuset structure and all it's functions
+ * changed to bitmaps. So to work with old hwloc's we just to the
+ * opposite to avoid having to put a bunch of ifdef's in the code we
+ * just do it here.
+ */
+typedef hwloc_cpuset_t hwloc_bitmap_t;
+
+static inline hwloc_bitmap_t hwloc_bitmap_alloc(void)
+{
+ return hwloc_cpuset_alloc();
+}
+
+static inline void hwloc_bitmap_free(hwloc_bitmap_t bitmap)
+{
+ hwloc_cpuset_free(bitmap);
+}
+
+static inline void hwloc_bitmap_or(
+ hwloc_bitmap_t res, hwloc_bitmap_t bitmap1, hwloc_bitmap_t bitmap2)
+{
+ hwloc_cpuset_or(res, bitmap1, bitmap2);
+}
+
+static inline int hwloc_bitmap_asprintf(char **str, hwloc_bitmap_t bitmap)
+{
+ return hwloc_cpuset_asprintf(str, bitmap);
+}
+
+# endif
+
#endif
#ifndef PATH_MAX
@@ -75,15 +109,289 @@
static int _xcgroup_cpuset_init(xcgroup_t* cg);
+/*
+ * convert abstract range into the machine one
+ */
+static int _abs_to_mac(char* lrange, char** prange)
+{
+ static int total_cores = -1, total_cpus = -1;
+ bitstr_t* absmap = NULL;
+ bitstr_t* macmap = NULL;
+ int icore, ithread;
+ int absid, macid;
+ int rc = SLURM_SUCCESS;
+
+ if (total_cores == -1) {
+ total_cores = conf->sockets * conf->cores;
+ total_cpus = conf->block_map_size;
+ }
+
+ /* allocate bitmap */
+ absmap = bit_alloc(total_cores);
+ macmap = bit_alloc(total_cpus);
+
+ if (!absmap || !macmap) {
+ rc = SLURM_ERROR;
+ goto end_it;
+ }
+
+ /* string to bitmap conversion */
+ if (bit_unfmt(absmap, lrange)) {
+ rc = SLURM_ERROR;
+ goto end_it;
+ }
+
+ /* mapping abstract id to machine id using conf->block_map */
+ for (icore = 0; icore < total_cores; icore++) {
+ if (bit_test(absmap, icore)) {
+ for (ithread = 0; ithread<conf->threads; ithread++) {
+ absid = icore*conf->threads + ithread;
+ absid %= total_cpus;
+
+ macid = conf->block_map[absid];
+ macid %= total_cpus;
+
+ bit_set(macmap, macid);
+ }
+ }
+ }
+
+ /* convert machine cpu bitmap to range string */
+ *prange = (char*)xmalloc(total_cpus*6);
+ bit_fmt(*prange, total_cpus*6, macmap);
+
+ /* free unused bitmaps */
+end_it:
+ FREE_NULL_BITMAP(absmap);
+ FREE_NULL_BITMAP(macmap);
+
+ if (rc != SLURM_SUCCESS)
+ info("_abs_to_mac failed");
+
+ return rc;
+}
+
+/* when cgroups are configured with cpuset, at least
+ * cpuset.cpus and cpuset.mems must be set or the cgroup
+ * will not be available at all.
+ * we duplicate the ancestor configuration in the init step */
+static int _xcgroup_cpuset_init(xcgroup_t* cg)
+{
+ int fstatus,i;
+
+ char* cpuset_metafiles[] = {
+ "cpuset.cpus",
+ "cpuset.mems"
+ };
+ char* cpuset_meta;
+ char* cpuset_conf;
+ size_t csize;
+
+ xcgroup_t acg;
+ char* acg_name;
+ char* p;
+
+ fstatus = XCGROUP_ERROR;
+
+ /* load ancestor cg */
+ acg_name = (char*) xstrdup(cg->name);
+ p = rindex(acg_name,'/');
+ if (p == NULL) {
+ debug2("task/cgroup: unable to get ancestor path for "
+ "cpuset cg '%s' : %m",cg->path);
+ return fstatus;
+ } else
+ *p = '\0';
+ if (xcgroup_load(cg->ns,&acg, acg_name) != XCGROUP_SUCCESS) {
+ debug2("task/cgroup: unable to load ancestor for "
+ "cpuset cg '%s' : %m",cg->path);
+ return fstatus;
+ }
+
+ /* inherits ancestor params */
+ for (i = 0 ; i < 2 ; i++) {
+ cpuset_meta = cpuset_metafiles[i];
+ if (xcgroup_get_param(&acg,cpuset_meta,
+ &cpuset_conf,&csize)
+ != XCGROUP_SUCCESS) {
+ debug2("task/cgroup: assuming no cpuset cg "
+ "support for '%s'",acg.path);
+ xcgroup_destroy(&acg);
+ return fstatus;
+ }
+ if (csize > 0)
+ cpuset_conf[csize-1]='\0';
+ if (xcgroup_set_param(cg,cpuset_meta,cpuset_conf)
+ != XCGROUP_SUCCESS) {
+ debug2("task/cgroup: unable to write %s configuration "
+ "(%s) for cpuset cg '%s'",cpuset_meta,
+ cpuset_conf,cg->path);
+ xcgroup_destroy(&acg);
+ xfree(cpuset_conf);
+ return fstatus;
+ }
+ xfree(cpuset_conf);
+ }
+
+ xcgroup_destroy(&acg);
+ return XCGROUP_SUCCESS;
+}
+
+#ifdef HAVE_HWLOC
+
+/*
+ * Add cpuset for an object to the total cpuset for a task, using the
+ * appropriate ancestor object cpuset if necessary
+ *
+ * obj = object to add
+ * cpuset = cpuset for task
+ */
+static void _add_cpuset(
+ hwloc_obj_type_t hwtype, hwloc_obj_type_t req_hwtype,
+ hwloc_obj_t obj, uint32_t taskid, int bind_verbose,
+ hwloc_bitmap_t cpuset)
+{
+ struct hwloc_obj *pobj;
+
+ /* if requested binding overlap the granularity */
+ /* use the ancestor cpuset instead of the object one */
+ if (hwloc_compare_types(hwtype,req_hwtype) > 0) {
+
+ /* Get the parent object of req_hwtype or the */
+ /* one just above if not found (meaning of >0)*/
+ /* (useful for ldoms binding with !NUMA nodes)*/
+ pobj = obj->parent;
+ while (pobj != NULL &&
+ hwloc_compare_types(pobj->type, req_hwtype) > 0)
+ pobj = pobj->parent;
+
+ if (pobj != NULL) {
+ if (bind_verbose)
+ info("task/cgroup: task[%u] higher level %s "
+ "found", taskid,
+ hwloc_obj_type_string(pobj->type));
+ hwloc_bitmap_or(cpuset, cpuset, pobj->allowed_cpuset);
+ } else {
+ /* should not be executed */
+ if (bind_verbose)
+ info("task/cgroup: task[%u] no higher level "
+ "found", taskid);
+ hwloc_bitmap_or(cpuset, cpuset, obj->allowed_cpuset);
+ }
+
+ } else
+ hwloc_bitmap_or(cpuset, cpuset, obj->allowed_cpuset);
+}
+
+/*
+ * Distribute cpus to the task using cyclic distribution across sockets
+ */
+static int _task_cgroup_cpuset_dist_cyclic(
+ hwloc_topology_t topology, hwloc_obj_type_t hwtype,
+ hwloc_obj_type_t req_hwtype, slurmd_job_t *job, int bind_verbose,
+ hwloc_bitmap_t cpuset)
+{
+ hwloc_obj_t obj;
+ uint32_t *obj_idx;
+ uint32_t i, sock_idx, npskip, npdist, nsockets;
+ uint32_t taskid = job->envtp->localid;
+
+ if (bind_verbose)
+ info("task/cgroup: task[%u] using cyclic distribution, "
+ "task_dist %u", taskid, job->task_dist);
+ nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
+ HWLOC_OBJ_SOCKET);
+ obj_idx = xmalloc(nsockets * sizeof(uint32_t));
+
+ if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
+ /* cores or threads granularity */
+ npskip = taskid * job->cpus_per_task;
+ npdist = job->cpus_per_task;
+ } else {
+ /* sockets or ldoms granularity */
+ npskip = taskid;
+ npdist = 1;
+ }
+
+ /* skip objs for lower taskids */
+ i = 0;
+ sock_idx = 0;
+ while (i < npskip) {
+ while ((sock_idx < nsockets) && (i < npskip)) {
+ obj = hwloc_get_obj_below_by_type(
+ topology, HWLOC_OBJ_SOCKET, sock_idx,
+ hwtype, obj_idx[sock_idx]);
+ if (obj != NULL) {
+ obj_idx[sock_idx]++;
+ i++;
+ }
+ sock_idx++;
+ }
+ if (i < npskip)
+ sock_idx = 0;
+ }
+
+ /* distribute objs cyclically across sockets */
+ i = npdist;
+ while (i > 0) {
+ while ((sock_idx < nsockets) && (i > 0)) {
+ obj = hwloc_get_obj_below_by_type(
+ topology, HWLOC_OBJ_SOCKET, sock_idx,
+ hwtype, obj_idx[sock_idx]);
+ if (obj != NULL) {
+ obj_idx[sock_idx]++;
+ _add_cpuset(hwtype, req_hwtype, obj, taskid,
+ bind_verbose, cpuset);
+ i--;
+ }
+ sock_idx++;
+ }
+ sock_idx = 0;
+ }
+ xfree(obj_idx);
+ return XCGROUP_SUCCESS;
+}
+
+/*
+ * Distribute cpus to the task using block distribution
+ */
+static int _task_cgroup_cpuset_dist_block(
+ hwloc_topology_t topology, hwloc_obj_type_t hwtype,
+ hwloc_obj_type_t req_hwtype, uint32_t nobj,
+ slurmd_job_t *job, int bind_verbose, hwloc_bitmap_t cpuset)
+{
+ hwloc_obj_t obj;
+ uint32_t i, pfirst,plast;
+ uint32_t taskid = job->envtp->localid;
+ int hwdepth;
+
+ if (bind_verbose)
+ info("task/cgroup: task[%u] using block distribution, "
+ "task_dist %u", taskid, job->task_dist);
+ if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
+ /* cores or threads granularity */
+ pfirst = taskid * job->cpus_per_task ;
+ plast = pfirst + job->cpus_per_task - 1;
+ } else {
+ /* sockets or ldoms granularity */
+ pfirst = taskid;
+ plast = pfirst;
+ }
+ hwdepth = hwloc_get_type_depth(topology,hwtype);
+ for (i = pfirst; i <= plast && i < nobj ; i++) {
+ obj = hwloc_get_obj_by_depth(topology, hwdepth, (int)i);
+ _add_cpuset(hwtype, req_hwtype, obj, taskid, bind_verbose,
+ cpuset);
+ }
+ return XCGROUP_SUCCESS;
+}
+
+#endif
+
extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
{
char release_agent_path[PATH_MAX];
- /* initialize cpuinfo internal data */
- if (xcpuinfo_init() != XCPUINFO_SUCCESS) {
- return SLURM_ERROR;
- }
-
/* initialize user/job/jobstep cgroup relative paths */
user_cgroup_path[0]='\0';
job_cgroup_path[0]='\0';
@@ -92,13 +400,13 @@
/* initialize cpuset cgroup namespace */
release_agent_path[0]='\0';
if (snprintf(release_agent_path,PATH_MAX,"%s/release_cpuset",
- slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
+ slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
error("task/cgroup: unable to build cpuset release agent path");
goto error;
}
if (xcgroup_ns_create(slurm_cgroup_conf, &cpuset_ns, "/cpuset", "",
- "cpuset",release_agent_path) !=
- XCGROUP_SUCCESS) {
+ "cpuset",release_agent_path) !=
+ XCGROUP_SUCCESS) {
error("task/cgroup: unable to create cpuset namespace");
goto error;
}
@@ -108,7 +416,7 @@
if (slurm_cgroup_conf->cgroup_automount) {
if (xcgroup_ns_mount(&cpuset_ns)) {
error("task/cgroup: unable to mount cpuset "
- "namespace");
+ "namespace: %s", slurm_strerror(errno));
goto clean;
}
info("task/cgroup: cpuset namespace is now mounted");
@@ -125,7 +433,6 @@
xcgroup_ns_destroy(&cpuset_ns);
error:
- xcpuinfo_fini();
return SLURM_ERROR;
}
@@ -145,7 +452,6 @@
xcgroup_ns_destroy(&cpuset_ns);
- xcpuinfo_fini();
return SLURM_SUCCESS;
}
@@ -210,7 +516,7 @@
/* build job cgroup relative path if no set (should not be) */
if (*job_cgroup_path == '\0') {
if (snprintf(job_cgroup_path,PATH_MAX,"%s/job_%u",
- user_cgroup_path,jobid) >= PATH_MAX) {
+ user_cgroup_path,jobid) >= PATH_MAX) {
error("task/cgroup: unable to build job %u cpuset "
"cg relative path : %m",jobid);
return SLURM_ERROR;
@@ -229,7 +535,8 @@
return SLURM_ERROR;
}
} else {
- if (snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u",
+ if (snprintf(jobstep_cgroup_path,
+ PATH_MAX, "%s/step_%u",
job_cgroup_path, stepid) >= PATH_MAX) {
error("task/cgroup: unable to build job step"
" %u.%u cpuset cg relative path: %m",
@@ -268,27 +575,27 @@
job->job_alloc_cores);
debug("task/cgroup: step abstract cores are '%s'",
job->step_alloc_cores);
- if (xcpuinfo_abs_to_mac(job->job_alloc_cores,
- &job_alloc_cores) != XCPUINFO_SUCCESS) {
+ if (_abs_to_mac(job->job_alloc_cores,
+ &job_alloc_cores) != SLURM_SUCCESS) {
error("task/cgroup: unable to build job physical cores");
goto error;
}
- if (xcpuinfo_abs_to_mac(job->step_alloc_cores,
- &step_alloc_cores) != XCPUINFO_SUCCESS) {
+ if (_abs_to_mac(job->step_alloc_cores,
+ &step_alloc_cores) != SLURM_SUCCESS) {
error("task/cgroup: unable to build step physical cores");
goto error;
}
debug("task/cgroup: job physical cores are '%s'",
- job->job_alloc_cores);
+ job_alloc_cores);
debug("task/cgroup: step physical cores are '%s'",
- job->step_alloc_cores);
+ step_alloc_cores);
/*
* create user cgroup in the cpuset ns (it could already exist)
*/
if (xcgroup_create(&cpuset_ns,&user_cpuset_cg,
- user_cgroup_path,
- getuid(),getgid()) != XCGROUP_SUCCESS) {
+ user_cgroup_path,
+ getuid(),getgid()) != XCGROUP_SUCCESS) {
goto error;
}
if (xcgroup_instanciate(&user_cpuset_cg) != XCGROUP_SUCCESS) {
@@ -303,7 +610,7 @@
if (rc != XCGROUP_SUCCESS || cpus_size == 1) {
/* initialize the cpusets as it was inexistant */
if (_xcgroup_cpuset_init(&user_cpuset_cg) !=
- XCGROUP_SUCCESS) {
+ XCGROUP_SUCCESS) {
xcgroup_delete(&user_cpuset_cg);
xcgroup_destroy(&user_cpuset_cg);
goto error;
@@ -322,8 +629,8 @@
* create job cgroup in the cpuset ns (it could already exist)
*/
if (xcgroup_create(&cpuset_ns,&job_cpuset_cg,
- job_cgroup_path,
- getuid(),getgid()) != XCGROUP_SUCCESS) {
+ job_cgroup_path,
+ getuid(),getgid()) != XCGROUP_SUCCESS) {
xcgroup_destroy(&user_cpuset_cg);
goto error;
}
@@ -345,8 +652,8 @@
* the user inside the step cgroup owned by root
*/
if (xcgroup_create(&cpuset_ns,&step_cpuset_cg,
- jobstep_cgroup_path,
- uid,gid) != XCGROUP_SUCCESS) {
+ jobstep_cgroup_path,
+ uid,gid) != XCGROUP_SUCCESS) {
/* do not delete user/job cgroup as */
/* they can exist for other steps */
xcgroup_destroy(&user_cpuset_cg);
@@ -412,33 +719,24 @@
return fstatus;
#else
- uint32_t i;
+ hwloc_obj_type_t socket_or_node;
uint32_t nldoms;
uint32_t nsockets;
uint32_t ncores;
uint32_t npus;
uint32_t nobj;
-
- uint32_t pfirst,plast;
uint32_t taskid = job->envtp->localid;
uint32_t jntasks = job->node_tasks;
uint32_t jnpus = jntasks * job->cpus_per_task;
pid_t pid = job->envtp->task_pid;
cpu_bind_type_t bind_type;
- int verbose;
+ int bind_verbose = 0;
hwloc_topology_t topology;
-#if HWLOC_API_VERSION <= 0x00010000
- hwloc_cpuset_t cpuset,ct;
-#else
- hwloc_bitmap_t cpuset,ct;
-#endif
- hwloc_obj_t obj;
- struct hwloc_obj *pobj;
+ hwloc_bitmap_t cpuset;
hwloc_obj_type_t hwtype;
hwloc_obj_type_t req_hwtype;
- int hwdepth;
size_t tssize;
cpu_set_t ts;
@@ -446,48 +744,56 @@
bind_type = job->cpu_bind_type ;
if (conf->task_plugin_param & CPU_BIND_VERBOSE ||
bind_type & CPU_BIND_VERBOSE)
- verbose = 1 ;
+ bind_verbose = 1 ;
+
+ /* Allocate and initialize hwloc objects */
+ hwloc_topology_init(&topology);
+
+ cpuset = hwloc_bitmap_alloc();
+
+ hwloc_topology_load(topology);
+ if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
+ hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) {
+ /* One socket contains multiple NUMA-nodes
+ * like AMD Opteron 6000 series etc.
+ * In such case, use NUMA-node instead of socket. */
+ socket_or_node = HWLOC_OBJ_NODE;
+ } else {
+ socket_or_node = HWLOC_OBJ_SOCKET;
+ }
if (bind_type & CPU_BIND_NONE) {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] is requesting no affinity",
taskid);
return 0;
} else if (bind_type & CPU_BIND_TO_THREADS) {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] is requesting "
"thread level binding",taskid);
req_hwtype = HWLOC_OBJ_PU;
} else if (bind_type & CPU_BIND_TO_CORES) {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] is requesting "
"core level binding",taskid);
req_hwtype = HWLOC_OBJ_CORE;
} else if (bind_type & CPU_BIND_TO_SOCKETS) {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] is requesting "
"socket level binding",taskid);
- req_hwtype = HWLOC_OBJ_SOCKET;
+ req_hwtype = socket_or_node;
} else if (bind_type & CPU_BIND_TO_LDOMS) {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] is requesting "
"ldom level binding",taskid);
req_hwtype = HWLOC_OBJ_NODE;
} else {
- if (verbose)
+ if (bind_verbose)
info("task/cgroup: task[%u] using core level binding"
" by default",taskid);
req_hwtype = HWLOC_OBJ_CORE;
}
- /* Allocate and initialize hwloc objects */
- hwloc_topology_init(&topology);
-#if HWLOC_API_VERSION <= 0x00010000
- cpuset = hwloc_cpuset_alloc() ;
-#else
- cpuset = hwloc_bitmap_alloc() ;
-#endif
-
/*
* Perform the topology detection. It will only get allowed PUs.
* Detect in the same time the granularity to use for binding.
@@ -504,15 +810,15 @@
* to dispatch the tasks across the sockets and then provide access
* to each task to the cores of its socket.)
*/
- hwloc_topology_load(topology);
npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
HWLOC_OBJ_PU);
ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
HWLOC_OBJ_CORE);
nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
- HWLOC_OBJ_SOCKET);
+ socket_or_node);
nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology,
HWLOC_OBJ_NODE);
+
hwtype = HWLOC_OBJ_MACHINE;
nobj = 1;
if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) {
@@ -524,8 +830,8 @@
nobj = ncores;
}
if (nsockets >= jntasks &&
- bind_type & CPU_BIND_TO_SOCKETS) {
- hwtype = HWLOC_OBJ_SOCKET;
+ bind_type & CPU_BIND_TO_SOCKETS) {
+ hwtype = socket_or_node;
nobj = nsockets;
}
/*
@@ -536,15 +842,16 @@
* we have as many sockets as ldoms before moving to ldoms granularity
*/
if (nldoms >= jntasks &&
- nsockets >= nldoms &&
- bind_type & CPU_BIND_TO_LDOMS) {
+ nsockets >= nldoms &&
+ bind_type & CPU_BIND_TO_LDOMS) {
hwtype = HWLOC_OBJ_NODE;
nobj = nldoms;
}
/*
- * Perform a block binding on the detected object respecting the
- * granularity.
+ * Bind the detected object to the taskid, respecting the
+ * granularity, using the designated or default distribution
+ * method (block or cyclic).
* If not enough objects to do the job, revert to no affinity mode
*/
if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) {
@@ -553,109 +860,80 @@
"granularity",taskid,hwloc_obj_type_string(hwtype));
} else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 &&
- jnpus > nobj) {
+ jnpus > nobj) {
info("task/cgroup: task[%u] not enough %s objects, disabling "
"affinity",taskid,hwloc_obj_type_string(hwtype));
} else {
+ char *str;
- if (verbose) {
+ if (bind_verbose) {
info("task/cgroup: task[%u] using %s granularity",
taskid,hwloc_obj_type_string(hwtype));
}
- if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
- /* cores or threads granularity */
- pfirst = taskid * job->cpus_per_task ;
- plast = pfirst + job->cpus_per_task - 1;
- } else {
- /* sockets or ldoms granularity */
- pfirst = taskid;
- plast = pfirst;
+
+ /* There are two "distributions," controlled by the
+ * -m option of srun and friends. The first is the
+ * distribution of tasks to nodes. The second is the
+ * distribution of allocated cpus to tasks for
+ * binding. This code is handling the second
+ * distribution. Here's how the values get set, based
+ * on the value of -m
+ *
+ * SLURM_DIST_CYCLIC = srun -m cyclic
+ * SLURM_DIST_BLOCK = srun -m block
+ * SLURM_DIST_CYCLIC_CYCLIC = srun -m cyclic:cyclic
+ * SLURM_DIST_BLOCK_CYCLIC = srun -m block:cyclic
+ *
+ * In the first two cases, the user only specified the
+ * first distribution. The second distribution
+ * defaults to cyclic. In the second two cases, the
+ * user explicitly requested a second distribution of
+ * cyclic. So all these four cases correspond to a
+ * second distribution of cyclic. So we want to call
+ * _task_cgroup_cpuset_dist_cyclic.
+ *
+ * If the user explicitly specifies a second
+ * distribution of block, or if
+ * CR_CORE_DEFAULT_DIST_BLOCK is configured and the
+ * user does not explicitly specify a second
+ * distribution of cyclic, the second distribution is
+ * block, and we need to call
+ * _task_cgroup_cpuset_dist_block. In these cases,
+ * task_dist would be set to SLURM_DIST_CYCLIC_BLOCK
+ * or SLURM_DIST_BLOCK_BLOCK.
+ *
+ * You can see the equivalent code for the
+ * task/affinity plugin in
+ * src/plugins/task/affinity/dist_tasks.c, around line 384.
+ */
+ switch (job->task_dist) {
+ case SLURM_DIST_CYCLIC:
+ case SLURM_DIST_BLOCK:
+ case SLURM_DIST_CYCLIC_CYCLIC:
+ case SLURM_DIST_BLOCK_CYCLIC:
+ _task_cgroup_cpuset_dist_cyclic(
+ topology, hwtype, req_hwtype,
+ job, bind_verbose, cpuset);
+ break;
+ default:
+ _task_cgroup_cpuset_dist_block(
+ topology, hwtype, req_hwtype,
+ nobj, job, bind_verbose, cpuset);
}
- hwdepth = hwloc_get_type_depth(topology,hwtype);
- for (i = pfirst; i <= plast && i < nobj ; i++) {
- obj = hwloc_get_obj_by_depth(topology,hwdepth,(int)i);
+ hwloc_bitmap_asprintf(&str, cpuset);
- /* if requested binding overlap the granularity */
- /* use the ancestor cpuset instead of the object one */
- if (hwloc_compare_types(hwtype,req_hwtype) > 0) {
-
- /* Get the parent object of req_hwtype or the */
- /* one just above if not found (meaning of >0)*/
- /* (useful for ldoms binding with !NUMA nodes)*/
- pobj = obj->parent;
- while (pobj != NULL &&
- hwloc_compare_types(pobj->type,
- req_hwtype) > 0)
- pobj = pobj->parent;
-
- if (pobj != NULL) {
- if (verbose)
- info("task/cgroup: task[%u] "
- "higher level %s found",
- taskid,
- hwloc_obj_type_string(
- pobj->type));
-#if HWLOC_API_VERSION <= 0x00010000
- ct = hwloc_cpuset_dup(pobj->
- allowed_cpuset);
- hwloc_cpuset_or(cpuset,cpuset,ct);
- hwloc_cpuset_free(ct);
-#else
- ct = hwloc_bitmap_dup(pobj->
- allowed_cpuset);
- hwloc_bitmap_or(cpuset,cpuset,ct);
- hwloc_bitmap_free(ct);
-#endif
- } else {
- /* should not be executed */
- if (verbose)
- info("task/cgroup: task[%u] "
- "no higher level found",
- taskid);
-#if HWLOC_API_VERSION <= 0x00010000
- ct = hwloc_cpuset_dup(obj->
- allowed_cpuset);
- hwloc_cpuset_or(cpuset,cpuset,ct);
- hwloc_cpuset_free(ct);
-#else
- ct = hwloc_bitmap_dup(obj->
- allowed_cpuset);
- hwloc_bitmap_or(cpuset,cpuset,ct);
- hwloc_bitmap_free(ct);
-#endif
- }
-
- } else {
-#if HWLOC_API_VERSION <= 0x00010000
- ct = hwloc_cpuset_dup(obj->allowed_cpuset);
- hwloc_cpuset_or(cpuset,cpuset,ct);
- hwloc_cpuset_free(ct);
-#else
- ct = hwloc_bitmap_dup(obj->allowed_cpuset);
- hwloc_bitmap_or(cpuset,cpuset,ct);
- hwloc_bitmap_free(ct);
-#endif
- }
- }
-
- char *str;
-#if HWLOC_API_VERSION <= 0x00010000
- hwloc_cpuset_asprintf(&str,cpuset);
-#else
- hwloc_bitmap_asprintf(&str,cpuset);
-#endif
tssize = sizeof(cpu_set_t);
if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset,
- &ts,tssize) == 0) {
+ &ts,tssize) == 0) {
fstatus = SLURM_SUCCESS;
if (sched_setaffinity(pid,tssize,&ts)) {
error("task/cgroup: task[%u] unable to set "
"taskset '%s'",taskid,str);
fstatus = SLURM_ERROR;
- } else if (verbose) {
+ } else if (bind_verbose) {
info("task/cgroup: task[%u] taskset '%s' is set"
,taskid,str);
}
@@ -665,85 +943,14 @@
fstatus = SLURM_ERROR;
}
free(str);
-
}
/* Destroy hwloc objects */
-#if HWLOC_API_VERSION <= 0x00010000
- hwloc_cpuset_free(cpuset);
-#else
hwloc_bitmap_free(cpuset);
-#endif
+
hwloc_topology_destroy(topology);
return fstatus;
#endif
}
-
-
-/* when cgroups are configured with cpuset, at least
- * cpuset.cpus and cpuset.mems must be set or the cgroup
- * will not be available at all.
- * we duplicate the ancestor configuration in the init step */
-static int _xcgroup_cpuset_init(xcgroup_t* cg)
-{
- int fstatus,i;
-
- char* cpuset_metafiles[] = {
- "cpuset.cpus",
- "cpuset.mems"
- };
- char* cpuset_meta;
- char* cpuset_conf;
- size_t csize;
-
- xcgroup_t acg;
- char* acg_name;
- char* p;
-
- fstatus = XCGROUP_ERROR;
-
- /* load ancestor cg */
- acg_name = (char*) xstrdup(cg->name);
- p = rindex(acg_name,'/');
- if (p == NULL) {
- debug2("task/cgroup: unable to get ancestor path for "
- "cpuset cg '%s' : %m",cg->path);
- return fstatus;
- } else
- *p = '\0';
- if (xcgroup_load(cg->ns,&acg,acg_name) != XCGROUP_SUCCESS) {
- debug2("task/cgroup: unable to load ancestor for "
- "cpuset cg '%s' : %m",cg->path);
- return fstatus;
- }
-
- /* inherits ancestor params */
- for (i = 0 ; i < 2 ; i++) {
- cpuset_meta = cpuset_metafiles[i];
- if (xcgroup_get_param(&acg,cpuset_meta,
- &cpuset_conf,&csize)
- != XCGROUP_SUCCESS) {
- debug2("task/cgroup: assuming no cpuset cg "
- "support for '%s'",acg.path);
- xcgroup_destroy(&acg);
- return fstatus;
- }
- if (csize > 0)
- cpuset_conf[csize-1]='\0';
- if (xcgroup_set_param(cg,cpuset_meta,cpuset_conf)
- != XCGROUP_SUCCESS) {
- debug2("task/cgroup: unable to write %s configuration "
- "(%s) for cpuset cg '%s'",cpuset_meta,
- cpuset_conf,cg->path);
- xcgroup_destroy(&acg);
- xfree(cpuset_conf);
- return fstatus;
- }
- xfree(cpuset_conf);
- }
-
- xcgroup_destroy(&acg);
- return XCGROUP_SUCCESS;
-}
diff --git a/src/plugins/task/cgroup/task_cgroup_devices.c b/src/plugins/task/cgroup/task_cgroup_devices.c
index 303bb43..82f802d 100644
--- a/src/plugins/task/cgroup/task_cgroup_devices.c
+++ b/src/plugins/task/cgroup/task_cgroup_devices.c
@@ -123,7 +123,7 @@
if ( slurm_cgroup_conf->cgroup_automount ) {
if ( xcgroup_ns_mount(&devices_ns) ) {
error("task/cgroup: unable to mount devices "
- "namespace");
+ "namespace: %s", slurm_strerror(errno));
goto clean;
}
info("task/cgroup: devices namespace is now mounted");
diff --git a/src/plugins/task/cgroup/task_cgroup_memory.c b/src/plugins/task/cgroup/task_cgroup_memory.c
index ef37a06..56534b2 100644
--- a/src/plugins/task/cgroup/task_cgroup_memory.c
+++ b/src/plugins/task/cgroup/task_cgroup_memory.c
@@ -68,6 +68,9 @@
static xcgroup_t job_memory_cg;
static xcgroup_t step_memory_cg;
+static bool constrain_ram_space;
+static bool constrain_swap_space;
+
static float allowed_ram_space; /* Allowed RAM in percent */
static float allowed_swap_space; /* Allowed Swap percent */
@@ -109,7 +112,7 @@
if (slurm_cgroup_conf->cgroup_automount) {
if (xcgroup_ns_mount(&memory_ns)) {
error("task/cgroup: unable to mount memory "
- "namespace");
+ "namespace: %s", slurm_strerror(errno));
goto clean;
}
info("task/cgroup: memory namespace is now mounted");
@@ -120,7 +123,21 @@
}
}
- allowed_ram_space = slurm_cgroup_conf->allowed_ram_space;
+ constrain_ram_space = slurm_cgroup_conf->constrain_ram_space;
+ constrain_swap_space = slurm_cgroup_conf->constrain_swap_space;
+
+ /*
+ * as the swap space threshold will be configured with a
+ * mem+swp parameter value, if RAM space is not monitored,
+ * set allowed RAM space to 100% of the job requested memory.
+ * It will help to construct the mem+swp value that will be
+ * used for both mem and mem+swp limit during memcg creation.
+ */
+ if ( constrain_ram_space )
+ allowed_ram_space = slurm_cgroup_conf->allowed_ram_space;
+ else
+ allowed_ram_space = 100.0;
+
allowed_swap_space = slurm_cgroup_conf->allowed_swap_space;
if ((totalram = (uint64_t) conf->real_memory_size) == 0)
@@ -131,17 +148,19 @@
max_swap += max_ram;
min_ram_space = slurm_cgroup_conf->min_ram_space * 1024 * 1024;
- debug ("task/cgroup/memory: total:%luM allowed:%.4g%%, swap:%.4g%%, "
- "max:%.4g%%(%luM) max+swap:%.4g%%(%luM) min:%uM",
- (unsigned long) totalram,
- allowed_ram_space,
- allowed_swap_space,
- slurm_cgroup_conf->max_ram_percent,
- (unsigned long) (max_ram/(1024*1024)),
- slurm_cgroup_conf->max_swap_percent,
- (unsigned long) (max_swap/(1024*1024)),
- (unsigned) slurm_cgroup_conf->min_ram_space);
-
+ debug ("task/cgroup/memory: total:%luM allowed:%.4g%%(%s), "
+ "swap:%.4g%%(%s), max:%.4g%%(%luM) max+swap:%.4g%%(%luM) min:%uM",
+ (unsigned long) totalram,
+ allowed_ram_space,
+ constrain_ram_space?"enforced":"permissive",
+ allowed_swap_space,
+ constrain_swap_space?"enforced":"permissive",
+ slurm_cgroup_conf->max_ram_percent,
+ (unsigned long) (max_ram/(1024*1024)),
+ slurm_cgroup_conf->max_swap_percent,
+ (unsigned long) (max_swap/(1024*1024)),
+ (unsigned) slurm_cgroup_conf->min_ram_space);
+
/*
* Warning: OOM Killer must be disabled for slurmstepd
* or it would be destroyed if the application use
@@ -149,12 +168,12 @@
*
* If an env value is already set for slurmstepd
* OOM killer behavior, keep it, otherwise set the
- * -17 value, wich means do not let OOM killer kill it
+ * -1000 value, wich means do not let OOM killer kill it
*
- * FYI, setting "export SLURMSTEPD_OOM_ADJ=-17"
+ * FYI, setting "export SLURMSTEPD_OOM_ADJ=-1000"
* in /etc/sysconfig/slurm would be the same
*/
- setenv("SLURMSTEPD_OOM_ADJ","-17",0);
+ setenv("SLURMSTEPD_OOM_ADJ", "-1000", 0);
return SLURM_SUCCESS;
@@ -264,14 +283,29 @@
}
xcgroup_set_param (cg, "memory.use_hierarchy","1");
- xcgroup_set_uint64_param (cg, "memory.limit_in_bytes", mlb);
- xcgroup_set_uint64_param (cg, "memory.memsw.limit_in_bytes", mls);
- info ("task/cgroup: %s: alloc=%luMB mem.limit=%luMB memsw.limit=%luMB",
- path,
- (unsigned long) mem_limit,
- (unsigned long) mlb/(1024*1024),
- (unsigned long) mls/(1024*1024));
+ /* when RAM space has not to be constrained and we are here, it
+ * means that only Swap space has to be constrained. Thus set
+ * RAM space limit to the mem+swap limit too */
+ if ( ! constrain_ram_space )
+ mlb = mls;
+ xcgroup_set_uint64_param (cg, "memory.limit_in_bytes", mlb);
+
+ /* this limit has to be set only if ConstrainSwapSpace is set to yes */
+ if ( constrain_swap_space ) {
+ xcgroup_set_uint64_param (cg, "memory.memsw.limit_in_bytes",
+ mls);
+ info ("task/cgroup: %s: alloc=%luMB mem.limit=%luMB "
+ "memsw.limit=%luMB", path,
+ (unsigned long) mem_limit,
+ (unsigned long) mlb/(1024*1024),
+ (unsigned long) mls/(1024*1024));
+ } else {
+ info ("task/cgroup: %s: alloc=%luMB mem.limit=%luMB "
+ "memsw.limit=unlimited", path,
+ (unsigned long) mem_limit,
+ (unsigned long) mlb/(1024*1024));
+ }
return 0;
}
diff --git a/src/plugins/task/none/Makefile.in b/src/plugins/task/none/Makefile.in
index 3f62f6e..6f22a60 100644
--- a/src/plugins/task/none/Makefile.in
+++ b/src/plugins/task/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
task_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-task_none.la: $(task_none_la_OBJECTS) $(task_none_la_DEPENDENCIES)
+task_none.la: $(task_none_la_OBJECTS) $(task_none_la_DEPENDENCIES) $(EXTRA_task_none_la_DEPENDENCIES)
$(task_none_la_LINK) -rpath $(pkglibdir) $(task_none_la_OBJECTS) $(task_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/topology/3d_torus/Makefile.in b/src/plugins/topology/3d_torus/Makefile.in
index 474e3fd..2538008 100644
--- a/src/plugins/topology/3d_torus/Makefile.in
+++ b/src/plugins/topology/3d_torus/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
topology_3d_torus_la_LIBADD =
@@ -209,6 +215,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -245,6 +252,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -410,7 +418,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-topology_3d_torus.la: $(topology_3d_torus_la_OBJECTS) $(topology_3d_torus_la_DEPENDENCIES)
+topology_3d_torus.la: $(topology_3d_torus_la_OBJECTS) $(topology_3d_torus_la_DEPENDENCIES) $(EXTRA_topology_3d_torus_la_DEPENDENCIES)
$(topology_3d_torus_la_LINK) -rpath $(pkglibdir) $(topology_3d_torus_la_OBJECTS) $(topology_3d_torus_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -549,10 +557,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/topology/Makefile.in b/src/plugins/topology/Makefile.in
index f5d50dc..676ddf5 100644
--- a/src/plugins/topology/Makefile.in
+++ b/src/plugins/topology/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -203,6 +203,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -239,6 +240,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -575,10 +577,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/topology/node_rank/Makefile.in b/src/plugins/topology/node_rank/Makefile.in
index 634473f..f23951b 100644
--- a/src/plugins/topology/node_rank/Makefile.in
+++ b/src/plugins/topology/node_rank/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
topology_node_rank_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-topology_node_rank.la: $(topology_node_rank_la_OBJECTS) $(topology_node_rank_la_DEPENDENCIES)
+topology_node_rank.la: $(topology_node_rank_la_OBJECTS) $(topology_node_rank_la_DEPENDENCIES) $(EXTRA_topology_node_rank_la_DEPENDENCIES)
$(topology_node_rank_la_LINK) -rpath $(pkglibdir) $(topology_node_rank_la_OBJECTS) $(topology_node_rank_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/topology/none/Makefile.in b/src/plugins/topology/none/Makefile.in
index 81cb386..75d6b23 100644
--- a/src/plugins/topology/none/Makefile.in
+++ b/src/plugins/topology/none/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
topology_none_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -404,7 +412,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-topology_none.la: $(topology_none_la_OBJECTS) $(topology_none_la_DEPENDENCIES)
+topology_none.la: $(topology_none_la_OBJECTS) $(topology_none_la_DEPENDENCIES) $(EXTRA_topology_none_la_DEPENDENCIES)
$(topology_none_la_LINK) -rpath $(pkglibdir) $(topology_none_la_OBJECTS) $(topology_none_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -541,10 +549,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/plugins/topology/tree/Makefile.in b/src/plugins/topology/tree/Makefile.in
index b155731..f7b0243 100644
--- a/src/plugins/topology/tree/Makefile.in
+++ b/src/plugins/topology/tree/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -105,6 +105,12 @@
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(pkglibdir)"
LTLIBRARIES = $(pkglib_LTLIBRARIES)
topology_tree_la_LIBADD =
@@ -208,6 +214,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +251,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -405,7 +413,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-topology_tree.la: $(topology_tree_la_OBJECTS) $(topology_tree_la_DEPENDENCIES)
+topology_tree.la: $(topology_tree_la_OBJECTS) $(topology_tree_la_DEPENDENCIES) $(EXTRA_topology_tree_la_DEPENDENCIES)
$(topology_tree_la_LINK) -rpath $(pkglibdir) $(topology_tree_la_OBJECTS) $(topology_tree_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sacct/Makefile.am b/src/sacct/Makefile.am
index 626265d..d203aaf 100644
--- a/src/sacct/Makefile.am
+++ b/src/sacct/Makefile.am
@@ -12,7 +12,12 @@
sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
noinst_HEADERS = sacct.c
-sacct_SOURCES = sacct.c process.c print.c options.c
+sacct_SOURCES = \
+ options.c \
+ print.c \
+ process.c \
+ sacct.c \
+ sacct.h
force:
$(sacct_LDADD) : force
diff --git a/src/sacct/Makefile.in b/src/sacct/Makefile.in
index 17e513e..16be322 100644
--- a/src/sacct/Makefile.in
+++ b/src/sacct/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -89,8 +89,8 @@
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
PROGRAMS = $(bin_PROGRAMS)
-am_sacct_OBJECTS = sacct.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \
- options.$(OBJEXT)
+am_sacct_OBJECTS = options.$(OBJEXT) print.$(OBJEXT) process.$(OBJEXT) \
+ sacct.$(OBJEXT)
sacct_OBJECTS = $(am_sacct_OBJECTS)
am__DEPENDENCIES_1 =
sacct_DEPENDENCIES = $(top_builddir)/src/db_api/libslurmdb.o \
@@ -194,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -323,7 +325,13 @@
sacct_LDADD = $(top_builddir)/src/db_api/libslurmdb.o $(DL_LIBS)
sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
noinst_HEADERS = sacct.c
-sacct_SOURCES = sacct.c process.c print.c options.c
+sacct_SOURCES = \
+ options.c \
+ print.c \
+ process.c \
+ sacct.c \
+ sacct.h
+
all: all-am
.SUFFIXES:
@@ -401,7 +409,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sacct$(EXEEXT): $(sacct_OBJECTS) $(sacct_DEPENDENCIES)
+sacct$(EXEEXT): $(sacct_OBJECTS) $(sacct_DEPENDENCIES) $(EXTRA_sacct_DEPENDENCIES)
@rm -f sacct$(EXEEXT)
$(sacct_LINK) $(sacct_OBJECTS) $(sacct_LDADD) $(LIBS)
@@ -542,10 +550,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sacct/options.c b/src/sacct/options.c
index 316f67d..a25d31e 100644
--- a/src/sacct/options.c
+++ b/src/sacct/options.c
@@ -44,6 +44,13 @@
#include "sacct.h"
#include <time.h>
+/* getopt_long options, integers but not characters */
+#define OPT_LONG_ALLOCS 0x100
+#define OPT_LONG_DUP 0x101
+#define OPT_LONG_HELP 0x102
+#define OPT_LONG_USAGE 0x103
+#define OPT_LONG_NAME 0x104
+
void _help_fields_msg(void);
void _help_msg(void);
void _usage(void);
@@ -462,6 +469,8 @@
-N, --nodelist: \n\
Display jobs that ran on any of these nodes, \n\
can be one or more using a ranged string. \n\
+ --name: \n\
+ Display jobs that have any of these name(s). \n\
-o, --format: \n\
Comma separated list of fields. (use \"--helpformat\" \n\
for a list of available fields). \n\
@@ -604,50 +613,51 @@
bool set;
static struct option long_options[] = {
- {"allusers", 0,0, 'a'},
- {"accounts", 1, 0, 'A'},
- {"allocations", 0, ¶ms.opt_allocs, 1},
- {"brief", 0, 0, 'b'},
- {"completion", 0, ¶ms.opt_completion, 'c'},
- {"dump", 0, 0, 'd'},
- {"duplicates", 0, ¶ms.opt_dup, 1},
- {"helpformat", 0, 0, 'e'},
- {"help-fields", 0, 0, 'e'},
- {"endtime", 1, 0, 'E'},
- {"file", 1, 0, 'f'},
- {"gid", 1, 0, 'g'},
- {"group", 1, 0, 'g'},
- {"help", 0, 0, 'h'},
- {"helpformat", 0, ¶ms.opt_help, 2},
- {"nnodes", 1, 0, 'i'},
- {"ncpus", 1, 0, 'I'},
- {"jobs", 1, 0, 'j'},
- {"timelimit-min", 1, 0, 'k'},
- {"timelimit-max", 1, 0, 'K'},
- {"long", 0, 0, 'l'},
- {"allclusters", 0,0, 'L'},
- {"cluster", 1, 0, 'M'},
- {"clusters", 1, 0, 'M'},
- {"nodelist", 1, 0, 'N'},
- {"noheader", 0, 0, 'n'},
- {"fields", 1, 0, 'o'},
- {"format", 1, 0, 'o'},
- {"formatted_dump", 0, 0, 'O'},
- {"parsable", 0, 0, 'p'},
- {"parsable2", 0, 0, 'P'},
- {"qos", 1, 0, 'q'},
- {"partition", 1, 0, 'r'},
- {"state", 1, 0, 's'},
- {"starttime", 1, 0, 'S'},
- {"truncate", 0, 0, 'T'},
- {"uid", 1, 0, 'u'},
- {"usage", 0, ¶ms.opt_help, 3},
- {"user", 1, 0, 'u'},
- {"verbose", 0, 0, 'v'},
- {"version", 0, 0, 'V'},
- {"wckeys", 1, 0, 'W'},
- {"associations", 1, 0, 'x'},
- {0, 0, 0, 0}};
+ {"allusers", no_argument, 0, 'a'},
+ {"accounts", required_argument, 0, 'A'},
+ {"allocations", no_argument, ¶ms.opt_allocs, OPT_LONG_ALLOCS},
+ {"brief", no_argument, 0, 'b'},
+ {"completion", no_argument, ¶ms.opt_completion, 'c'},
+ {"dump", no_argument, 0, 'd'},
+ {"duplicates", no_argument, ¶ms.opt_dup, OPT_LONG_DUP},
+ {"helpformat", no_argument, 0, 'e'},
+ {"help-fields", no_argument, 0, 'e'},
+ {"endtime", required_argument, 0, 'E'},
+ {"file", required_argument, 0, 'f'},
+ {"gid", required_argument, 0, 'g'},
+ {"group", required_argument, 0, 'g'},
+ {"help", no_argument, 0, 'h'},
+ {"helpformat", no_argument, ¶ms.opt_help, OPT_LONG_HELP},
+ {"name", required_argument, 0, OPT_LONG_NAME},
+ {"nnodes", required_argument, 0, 'i'},
+ {"ncpus", required_argument, 0, 'I'},
+ {"jobs", required_argument, 0, 'j'},
+ {"timelimit-min", required_argument, 0, 'k'},
+ {"timelimit-max", required_argument, 0, 'K'},
+ {"long", no_argument, 0, 'l'},
+ {"allclusters", no_argument, 0, 'L'},
+ {"cluster", required_argument, 0, 'M'},
+ {"clusters", required_argument, 0, 'M'},
+ {"nodelist", required_argument, 0, 'N'},
+ {"noheader", no_argument, 0, 'n'},
+ {"fields", required_argument, 0, 'o'},
+ {"format", required_argument, 0, 'o'},
+ {"formatted_dump", no_argument, 0, 'O'},
+ {"parsable", no_argument, 0, 'p'},
+ {"parsable2", no_argument, 0, 'P'},
+ {"qos", required_argument, 0, 'q'},
+ {"partition", required_argument, 0, 'r'},
+ {"state", required_argument, 0, 's'},
+ {"starttime", required_argument, 0, 'S'},
+ {"truncate", no_argument, 0, 'T'},
+ {"uid", required_argument, 0, 'u'},
+ {"usage", no_argument, ¶ms.opt_help, OPT_LONG_USAGE},
+ {"user", required_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"version", no_argument, 0, 'V'},
+ {"wckeys", required_argument, 0, 'W'},
+ {"associations", required_argument, 0, 'x'},
+ {0, 0, 0, 0}};
params.opt_uid = getuid();
params.opt_gid = getgid();
@@ -791,6 +801,12 @@
}
job_cond->used_nodes = xstrdup(optarg);
break;
+ case OPT_LONG_NAME:
+ if(!job_cond->jobname_list)
+ job_cond->jobname_list =
+ list_create(slurm_destroy_char);
+ slurm_addto_char_list(job_cond->jobname_list, optarg);
+ break;
case 'o':
xstrfmtcat(params.opt_field_list, "%s,", optarg);
break;
@@ -806,9 +822,14 @@
PRINT_FIELDS_PARSABLE_NO_ENDING;
break;
case 'q':
- if(!g_qos_list)
+ if (!g_qos_list) {
+ slurmdb_qos_cond_t qos_cond;
+ memset(&qos_cond, 0,
+ sizeof(slurmdb_qos_cond_t));
+ qos_cond.with_deleted = 1;
g_qos_list = slurmdb_qos_get(
- acct_db_conn, NULL);
+ acct_db_conn, &qos_cond);
+ }
if(!job_cond->qos_list)
job_cond->qos_list =
@@ -1109,6 +1130,16 @@
debug2("Timelimit requested\t: %s", time_str);
}
+ /* specific jobnames requested? */
+ if (job_cond->jobname_list && list_count(job_cond->jobname_list)) {
+ debug2("Jobnames requested:");
+ itr = list_iterator_create(job_cond->jobname_list);
+ while((start = list_next(itr))) {
+ debug2("\t: %s", start);
+ }
+ list_iterator_destroy(itr);
+ }
+
/* select the output fields */
if(brief_output) {
if(params.opt_completion)
diff --git a/src/sacct/print.c b/src/sacct/print.c
index 03ac3ae..af1b224 100644
--- a/src/sacct/print.c
+++ b/src/sacct/print.c
@@ -959,9 +959,14 @@
break;
}
- if(!g_qos_list)
+ if (!g_qos_list) {
+ slurmdb_qos_cond_t qos_cond;
+ memset(&qos_cond, 0,
+ sizeof(slurmdb_qos_cond_t));
+ qos_cond.with_deleted = 1;
g_qos_list = slurmdb_qos_get(
- acct_db_conn, NULL);
+ acct_db_conn, &qos_cond);
+ }
tmp_char = _find_qos_name_from_list(g_qos_list,
tmp_int);
diff --git a/src/sacct/sacct.c b/src/sacct/sacct.c
index 6d5b980..aaeef5f 100644
--- a/src/sacct/sacct.c
+++ b/src/sacct/sacct.c
@@ -44,9 +44,9 @@
*/
sacct_parameters_t params;
print_field_t fields[] = {
- {10, "AllocCPUS", print_fields_int, PRINT_ALLOC_CPUS},
+ {10, "AllocCPUS", print_fields_uint, PRINT_ALLOC_CPUS},
{10, "Account", print_fields_str, PRINT_ACCOUNT},
- {7, "AssocID", print_fields_int, PRINT_ASSOCID},
+ {7, "AssocID", print_fields_uint, PRINT_ASSOCID},
{10, "AveCPU", print_fields_str, PRINT_AVECPU},
{10, "AvePages", print_fields_str, PRINT_AVEPAGES},
{10, "AveRSS", print_fields_str, PRINT_AVERSS},
@@ -61,35 +61,35 @@
{19, "Eligible", print_fields_date, PRINT_ELIGIBLE},
{19, "End", print_fields_date, PRINT_END},
{8, "ExitCode", print_fields_str, PRINT_EXITCODE},
- {6, "GID", print_fields_int, PRINT_GID},
+ {6, "GID", print_fields_uint, PRINT_GID},
{9, "Group", print_fields_str, PRINT_GROUP},
{-12, "JobID", print_fields_str, PRINT_JOBID},
{10, "JobName", print_fields_str, PRINT_JOBNAME},
{9, "Layout", print_fields_str, PRINT_LAYOUT},
{8, "MaxPages", print_fields_str, PRINT_MAXPAGES},
{12, "MaxPagesNode", print_fields_str, PRINT_MAXPAGESNODE},
- {14, "MaxPagesTask", print_fields_int, PRINT_MAXPAGESTASK},
+ {14, "MaxPagesTask", print_fields_uint, PRINT_MAXPAGESTASK},
{10, "MaxRSS", print_fields_str, PRINT_MAXRSS},
{10, "MaxRSSNode", print_fields_str, PRINT_MAXRSSNODE},
- {10, "MaxRSSTask", print_fields_int, PRINT_MAXRSSTASK},
+ {10, "MaxRSSTask", print_fields_uint, PRINT_MAXRSSTASK},
{10, "MaxVMSize", print_fields_str, PRINT_MAXVSIZE},
{14, "MaxVMSizeNode", print_fields_str, PRINT_MAXVSIZENODE},
- {14, "MaxVMSizeTask", print_fields_int, PRINT_MAXVSIZETASK},
+ {14, "MaxVMSizeTask", print_fields_uint, PRINT_MAXVSIZETASK},
{10, "MinCPU", print_fields_str, PRINT_MINCPU},
{10, "MinCPUNode", print_fields_str, PRINT_MINCPUNODE},
- {10, "MinCPUTask", print_fields_int, PRINT_MINCPUTASK},
- {10, "NCPUS", print_fields_int, PRINT_ALLOC_CPUS},
+ {10, "MinCPUTask", print_fields_uint, PRINT_MINCPUTASK},
+ {10, "NCPUS", print_fields_uint, PRINT_ALLOC_CPUS},
{8, "NNodes", print_fields_str, PRINT_NNODES},
{15, "NodeList", print_fields_str, PRINT_NODELIST},
- {8, "NTasks", print_fields_int, PRINT_NTASKS},
- {10, "Priority", print_fields_int, PRINT_PRIO},
+ {8, "NTasks", print_fields_uint, PRINT_NTASKS},
+ {10, "Priority", print_fields_uint, PRINT_PRIO},
{10, "Partition", print_fields_str, PRINT_PARTITION},
{10, "QOS", print_fields_str, PRINT_QOS},
- {6, "QOSRAW", print_fields_int, PRINT_QOSRAW},
- {8, "ReqCPUS", print_fields_int, PRINT_REQ_CPUS},
+ {6, "QOSRAW", print_fields_uint, PRINT_QOSRAW},
+ {8, "ReqCPUS", print_fields_uint, PRINT_REQ_CPUS},
{10, "Reserved", print_fields_time_from_secs, PRINT_RESV},
{10, "ResvCPU", print_fields_time_from_secs, PRINT_RESV_CPU},
- {10, "ResvCPURAW", print_fields_int, PRINT_RESV_CPU},
+ {10, "ResvCPURAW", print_fields_uint, PRINT_RESV_CPU},
{19, "Start", print_fields_date, PRINT_START},
{10, "State", print_fields_str, PRINT_STATE},
{19, "Submit", print_fields_date, PRINT_SUBMIT},
@@ -97,11 +97,11 @@
{10, "SystemCPU", print_fields_str, PRINT_SYSTEMCPU},
{10, "Timelimit", print_fields_str, PRINT_TIMELIMIT},
{10, "TotalCPU", print_fields_str, PRINT_TOTALCPU},
- {6, "UID", print_fields_int, PRINT_UID},
+ {6, "UID", print_fields_uint, PRINT_UID},
{9, "User", print_fields_str, PRINT_USER},
{10, "UserCPU", print_fields_str, PRINT_USERCPU},
{10, "WCKey", print_fields_str, PRINT_WCKEY},
- {10, "WCKeyID", print_fields_int, PRINT_WCKEYID},
+ {10, "WCKeyID", print_fields_uint, PRINT_WCKEYID},
{0, NULL, NULL, 0}};
List jobs = NULL;
diff --git a/src/sacctmgr/Makefile.in b/src/sacctmgr/Makefile.in
index a83569c..50136fa 100644
--- a/src/sacctmgr/Makefile.in
+++ b/src/sacctmgr/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -197,6 +197,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -233,6 +234,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -421,7 +423,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sacctmgr$(EXEEXT): $(sacctmgr_OBJECTS) $(sacctmgr_DEPENDENCIES)
+sacctmgr$(EXEEXT): $(sacctmgr_OBJECTS) $(sacctmgr_DEPENDENCIES) $(EXTRA_sacctmgr_DEPENDENCIES)
@rm -f sacctmgr$(EXEEXT)
$(sacctmgr_LINK) $(sacctmgr_OBJECTS) $(sacctmgr_LDADD) $(LIBS)
@@ -573,10 +575,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c
index 7f0a65b..6bb1252 100644
--- a/src/sacctmgr/account_functions.c
+++ b/src/sacctmgr/account_functions.c
@@ -602,6 +602,7 @@
assoc->grp_cpu_mins = start_assoc->grp_cpu_mins;
assoc->grp_cpus = start_assoc->grp_cpus;
assoc->grp_jobs = start_assoc->grp_jobs;
+ assoc->grp_mem = start_assoc->grp_mem;
assoc->grp_nodes = start_assoc->grp_nodes;
assoc->grp_submit_jobs = start_assoc->grp_submit_jobs;
assoc->grp_wall = start_assoc->grp_wall;
@@ -755,7 +756,7 @@
if(acct_cond->with_assocs)
slurm_addto_char_list(format_list,
"Cl,ParentN,U,Share,GrpJ,GrpN,"
- "GrpCPUs,GrpS,GrpWall,GrpCPUMins,"
+ "GrpCPUs,GrpMEM,GrpS,GrpWall,GrpCPUMins,"
"MaxJ,MaxN,MaxCPUs,MaxS,MaxW,"
"MaxCPUMins,QOS,DefaultQOS");
diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c
index c0fd5f7..b5e9600 100644
--- a/src/sacctmgr/association_functions.c
+++ b/src/sacctmgr/association_functions.c
@@ -319,6 +319,13 @@
if (slurm_addto_char_list(assoc_cond->grp_jobs_list,
value))
set = 1;
+ } else if (!strncasecmp (type, "GrpMemory", MAX(command_len, 4))) {
+ if (!assoc_cond->grp_mem_list)
+ assoc_cond->grp_mem_list =
+ list_create(slurm_destroy_char);
+ if (slurm_addto_char_list(assoc_cond->grp_mem_list,
+ value))
+ set = 1;
} else if (!strncasecmp (type, "GrpNodes", MAX(command_len, 4))) {
if (!assoc_cond->grp_nodes_list)
assoc_cond->grp_nodes_list =
@@ -474,6 +481,10 @@
if (get_uint(value, &assoc->grp_jobs,
"GrpJobs") == SLURM_SUCCESS)
set = 1;
+ } else if (!strncasecmp(type, "GrpMemory", MAX(command_len, 4))) {
+ if (get_uint(value, &assoc->grp_mem,
+ "GrpMemory") == SLURM_SUCCESS)
+ set = 1;
} else if (!strncasecmp(type, "GrpNodes", MAX(command_len, 4))) {
if (get_uint(value, &assoc->grp_nodes,
"GrpNodes") == SLURM_SUCCESS)
@@ -613,6 +624,9 @@
case PRINT_GRPJ:
field->print_routine(field, assoc->grp_jobs, last);
break;
+ case PRINT_GRPMEM:
+ field->print_routine(field, assoc->grp_mem, last);
+ break;
case PRINT_GRPN:
field->print_routine(field, assoc->grp_nodes, last);
break;
@@ -716,7 +730,7 @@
slurm_addto_char_list(format_list, "Cluster,Account,User,Part");
if (!assoc_cond->without_parent_limits)
slurm_addto_char_list(format_list,
- "Share,GrpJ,GrpN,GrpCPUs,"
+ "Share,GrpJ,GrpN,GrpCPUs,GrpMEM,"
"GrpS,GrpWall,GrpCPUMins,MaxJ,"
"MaxN,MaxCPUs,MaxS,MaxW,"
"MaxCPUMins,QOS,DefaultQOS");
diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c
index c558fb8..26ded4d 100644
--- a/src/sacctmgr/cluster_functions.c
+++ b/src/sacctmgr/cluster_functions.c
@@ -321,6 +321,7 @@
cluster->root_assoc->grp_cpus = start_assoc.grp_cpus;
cluster->root_assoc->grp_jobs = start_assoc.grp_jobs;
+ cluster->root_assoc->grp_mem = start_assoc.grp_mem;
cluster->root_assoc->grp_nodes = start_assoc.grp_nodes;
cluster->root_assoc->grp_submit_jobs =
start_assoc.grp_submit_jobs;
@@ -535,6 +536,11 @@
assoc->grp_jobs,
(curr_inx == field_count));
break;
+ case PRINT_GRPMEM:
+ field->print_routine(field,
+ assoc->grp_mem,
+ (curr_inx == field_count));
+ break;
case PRINT_GRPN:
field->print_routine(field,
assoc->grp_nodes,
diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c
index b9c7490..9d4de71 100644
--- a/src/sacctmgr/common.c
+++ b/src/sacctmgr/common.c
@@ -298,6 +298,11 @@
field->name = xstrdup("GrpJobs");
field->len = 7;
field->print_routine = print_fields_uint;
+ } else if (!strncasecmp("GrpMemory", object, MAX(command_len, 4))) {
+ field->type = PRINT_GRPMEM;
+ field->name = xstrdup("GrpMem");
+ field->len = 7;
+ field->print_routine = print_fields_uint;
} else if (!strncasecmp("GrpNodes", object, MAX(command_len, 4))) {
field->type = PRINT_GRPN;
field->name = xstrdup("GrpNodes");
@@ -334,10 +339,11 @@
field->name = xstrdup("MaxCPUMins");
field->len = 11;
field->print_routine = print_fields_uint64;
- } else if (!strncasecmp("MaxCPURunMins", object, MAX(command_len, 7))) {
+ } else if (!strncasecmp("MaxCPURunMinsPerUser",
+ object, MAX(command_len, 7))) {
field->type = PRINT_MAXCRM;
- field->name = xstrdup("MaxCPURunMins");
- field->len = 13;
+ field->name = xstrdup("MaxCPURunMinsPU");
+ field->len = 15;
field->print_routine = print_fields_uint64;
} else if (!strncasecmp("MaxCPUsPerJob", object, MAX(command_len, 7))) {
field->type = PRINT_MAXC;
@@ -347,14 +353,20 @@
} else if (!strncasecmp("MaxCPUsPerUser", object,
MAX(command_len, 11))) {
field->type = PRINT_MAXCU;
- field->name = xstrdup("MaxCPUsPerUser");
- field->len = 14;
+ field->name = xstrdup("MaxCPUsPU");
+ field->len = 9;
field->print_routine = print_fields_uint;
} else if (!strncasecmp("MaxJobs", object, MAX(command_len, 4))) {
field->type = PRINT_MAXJ;
field->name = xstrdup("MaxJobs");
field->len = 7;
field->print_routine = print_fields_uint;
+ } else if (!strncasecmp("MaxJobsPerUser",
+ object, MAX(command_len, 8))) {
+ field->type = PRINT_MAXJ; /* used same as MaxJobs */
+ field->name = xstrdup("MaxJobsPU");
+ field->len = 9;
+ field->print_routine = print_fields_uint;
} else if (!strncasecmp("MaxNodesPerJob", object,
MAX(command_len, 4))) {
field->type = PRINT_MAXN;
@@ -364,14 +376,20 @@
} else if (!strncasecmp("MaxNodesPerUser", object,
MAX(command_len, 12))) {
field->type = PRINT_MAXNU;
- field->name = xstrdup("MaxNodesPerUser");
- field->len = 15;
+ field->name = xstrdup("MaxNodesPU");
+ field->len = 10;
field->print_routine = print_fields_uint;
} else if (!strncasecmp("MaxSubmitJobs", object, MAX(command_len, 4))) {
field->type = PRINT_MAXS;
field->name = xstrdup("MaxSubmit");
field->len = 9;
field->print_routine = print_fields_uint;
+ } else if (!strncasecmp("MaxSubmitJobsPerUser",
+ object, MAX(command_len, 10))) {
+ field->type = PRINT_MAXS; /* used same as MaxSubmitJobs */
+ field->name = xstrdup("MaxSubmitPU");
+ field->len = 11;
+ field->print_routine = print_fields_uint;
} else if (!strncasecmp("MaxWallDurationPerJob", object,
MAX(command_len, 4))) {
field->type = PRINT_MAXW;
@@ -434,7 +452,7 @@
field->type = PRINT_PRIO;
field->name = xstrdup("Priority");
field->len = 10;
- field->print_routine = print_fields_int;
+ field->print_routine = print_fields_uint;
} else if (!strncasecmp("Problem", object, MAX(command_len, 1))) {
field->type = PRINT_PROBLEM;
field->name = xstrdup("Problem");
@@ -1333,6 +1351,11 @@
else if (assoc->grp_jobs != NO_VAL)
printf(" GrpJobs = %u\n", assoc->grp_jobs);
+ if (assoc->grp_mem == INFINITE)
+ printf(" GrpMemory = NONE\n");
+ else if (assoc->grp_mem != NO_VAL)
+ printf(" GrpMemory = %u\n", assoc->grp_mem);
+
if (assoc->grp_nodes == INFINITE)
printf(" GrpNodes = NONE\n");
else if (assoc->grp_nodes != NO_VAL)
@@ -1431,6 +1454,11 @@
else if (qos->grp_jobs != NO_VAL)
printf(" GrpJobs = %u\n", qos->grp_jobs);
+ if (qos->grp_mem == INFINITE)
+ printf(" GrpMemory = NONE\n");
+ else if (qos->grp_mem != NO_VAL)
+ printf(" GrpMemory = %u\n", qos->grp_mem);
+
if (qos->grp_nodes == INFINITE)
printf(" GrpNodes = NONE\n");
else if (qos->grp_nodes != NO_VAL)
diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c
index 0cf44e7..9aa00a1 100644
--- a/src/sacctmgr/file_functions.c
+++ b/src/sacctmgr/file_functions.c
@@ -54,6 +54,7 @@
uint64_t grp_cpu_mins;
uint32_t grp_cpus;
uint32_t grp_jobs;
+ uint32_t grp_mem;
uint32_t grp_nodes;
uint32_t grp_submit_jobs;
uint32_t grp_wall;
@@ -94,6 +95,7 @@
file_opts->grp_cpu_mins = (uint64_t)NO_VAL;
file_opts->grp_cpus = NO_VAL;
file_opts->grp_jobs = NO_VAL;
+ file_opts->grp_mem = NO_VAL;
file_opts->grp_nodes = NO_VAL;
file_opts->grp_submit_jobs = NO_VAL;
file_opts->grp_wall = NO_VAL;
@@ -376,6 +378,16 @@
_destroy_sacctmgr_file_opts(file_opts);
break;
}
+ } else if (!strncasecmp (sub, "GrpMemory",
+ MAX(command_len, 4))) {
+ if (get_uint(option, &file_opts->grp_mem,
+ "GrpMemory") != SLURM_SUCCESS) {
+ exit_code=1;
+ fprintf(stderr,
+ " Bad GrpMemory value: %s\n", option);
+ _destroy_sacctmgr_file_opts(file_opts);
+ break;
+ }
} else if (!strncasecmp (sub, "GrpNodes",
MAX(command_len, 4))) {
if (get_uint(option, &file_opts->grp_nodes,
@@ -558,8 +570,8 @@
slurm_addto_char_list(format_list,
"Account,ParentName");
slurm_addto_char_list(format_list,
- "Share,GrpCPUM,GrpCPUs,"
- "GrpJ,GrpN,GrpS,GrpW,MaxCPUM,MaxCPUs,"
+ "Share,GrpCPUM,GrpCPUs,GrpJ,"
+ "GrpMEM,GrpN,GrpS,GrpW,MaxCPUM,MaxCPUs,"
"MaxJ,MaxS,MaxN,MaxW,QOS,DefaultQOS");
print_fields_list = sacctmgr_process_format_list(format_list);
@@ -607,6 +619,10 @@
field->print_routine(field,
assoc->grp_jobs);
break;
+ case PRINT_GRPMEM:
+ field->print_routine(field,
+ assoc->grp_mem);
+ break;
case PRINT_GRPN:
field->print_routine(field,
assoc->grp_nodes);
@@ -769,6 +785,18 @@
file_opts->grp_jobs);
}
+ if((file_opts->grp_mem != NO_VAL)
+ && (assoc->grp_mem != file_opts->grp_mem)) {
+ mod_assoc.grp_mem = file_opts->grp_mem;
+ changed = 1;
+ xstrfmtcat(my_info,
+ "%-30.30s for %-7.7s %-10.10s %8d -> %d\n",
+ " Changed GrpMemory",
+ type, name,
+ assoc->grp_mem,
+ file_opts->grp_mem);
+ }
+
if ((file_opts->grp_nodes != NO_VAL)
&& (assoc->grp_nodes != file_opts->grp_nodes)) {
mod_assoc.grp_nodes = file_opts->grp_nodes;
@@ -1253,8 +1281,8 @@
int first = 1;
notice_thread_init();
(void) acct_storage_g_add_coord(db_conn, my_uid,
- file_opts->coord_list,
- &user_cond);
+ file_opts->coord_list,
+ &user_cond);
notice_thread_fini();
user->coord_accts = list_create(slurmdb_destroy_coord_rec);
@@ -1494,7 +1522,7 @@
else
acct->organization = xstrdup(file_opts->name);
/* info("adding account %s (%s) (%s)", */
-/* acct->name, acct->description, */
+/* acct->name, acct->description, */
/* acct->organization); */
return acct;
@@ -1551,6 +1579,7 @@
assoc->grp_cpu_mins = file_opts->grp_cpu_mins;
assoc->grp_cpus = file_opts->grp_cpus;
assoc->grp_jobs = file_opts->grp_jobs;
+ assoc->grp_mem = file_opts->grp_mem;
assoc->grp_nodes = file_opts->grp_nodes;
assoc->grp_submit_jobs = file_opts->grp_submit_jobs;
assoc->grp_wall = file_opts->grp_wall;
@@ -1729,6 +1758,9 @@
if (assoc->grp_jobs != INFINITE)
xstrfmtcat(*line, ":GrpJobs=%u", assoc->grp_jobs);
+ if(assoc->grp_mem != INFINITE)
+ xstrfmtcat(*line, ":GrpMemory=%u", assoc->grp_mem);
+
if (assoc->grp_nodes != INFINITE)
xstrfmtcat(*line, ":GrpNodes=%u", assoc->grp_nodes);
diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c
index 90cda4d..7c196d1 100644
--- a/src/sacctmgr/qos_functions.c
+++ b/src/sacctmgr/qos_functions.c
@@ -203,7 +203,8 @@
" Bad Preempt Mode given: %s\n",
argv[i]);
exit_code = 1;
- } else if (qos_cond->preempt_mode == PREEMPT_MODE_SUSPEND) {
+ } else if (qos_cond->preempt_mode ==
+ PREEMPT_MODE_SUSPEND) {
printf("PreemptType and PreemptMode "
"values incompatible\n");
exit_code = 1;
@@ -316,6 +317,13 @@
if (get_uint(argv[i]+end, &qos->grp_jobs,
"GrpJobs") == SLURM_SUCCESS)
set = 1;
+ } else if (!strncasecmp (argv[i], "GrpMemory",
+ MAX(command_len, 4))) {
+ if(!qos)
+ continue;
+ if (get_uint(argv[i]+end, &qos->grp_mem,
+ "GrpMemory") == SLURM_SUCCESS)
+ set = 1;
} else if (!strncasecmp (argv[i], "GrpNodes",
MAX(command_len, 4))) {
if(!qos)
@@ -622,11 +630,14 @@
qos->grp_cpu_mins = start_qos->grp_cpu_mins;
qos->grp_cpus = start_qos->grp_cpus;
qos->grp_jobs = start_qos->grp_jobs;
+ qos->grp_mem = start_qos->grp_mem;
qos->grp_nodes = start_qos->grp_nodes;
qos->grp_submit_jobs = start_qos->grp_submit_jobs;
qos->grp_wall = start_qos->grp_wall;
qos->max_cpu_mins_pj = start_qos->max_cpu_mins_pj;
+ qos->max_cpu_run_mins_pu =
+ start_qos->max_cpu_run_mins_pu;
qos->max_cpus_pj = start_qos->max_cpus_pj;
qos->max_cpus_pu = start_qos->max_cpus_pu;
qos->max_jobs_pu = start_qos->max_jobs_pu;
@@ -729,9 +740,13 @@
} else if(!list_count(format_list)) {
slurm_addto_char_list(format_list,
"Name,Prio,GraceT,Preempt,PreemptM,"
- "Flags%40,UsageThres,GrpCPUs,GrpCPUMins,"
- "GrpJ,GrpN,GrpS,GrpW,"
- "MaxCPUs,MaxCPUMins,MaxJ,MaxN,MaxS,MaxW");
+ "Flags%40,UsageThres,UsageFactor,"
+ "GrpCPUs,GrpCPUMins,GrpCPURunMins,"
+ "GrpJ,GrpMEM,GrpN,GrpS,GrpW,"
+ "MaxCPUs,MaxCPUMins,MaxN,MaxW,"
+ "MaxCPUsPerUser,"
+ "MaxJobsPerUser,MaxNodesPerUser,"
+ "MaxSubmitJobsPerUser");
}
print_fields_list = sacctmgr_process_format_list(format_list);
@@ -808,6 +823,11 @@
qos->grp_jobs,
(curr_inx == field_count));
break;
+ case PRINT_GRPMEM:
+ field->print_routine(field,
+ qos->grp_mem,
+ (curr_inx == field_count));
+ break;
case PRINT_GRPN:
field->print_routine(field,
qos->grp_nodes,
@@ -835,6 +855,12 @@
qos->max_cpu_mins_pj,
(curr_inx == field_count));
break;
+ case PRINT_MAXCRM:
+ field->print_routine(
+ field,
+ qos->max_cpu_run_mins_pu,
+ (curr_inx == field_count));
+ break;
case PRINT_MAXC:
field->print_routine(field,
qos->max_cpus_pj,
diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c
index 08f69ff..6abecce 100644
--- a/src/sacctmgr/sacctmgr.c
+++ b/src/sacctmgr/sacctmgr.c
@@ -816,14 +816,16 @@
WithDeleted, WithCoordinators, WithRawQOS, \n\
and WOPLimits \n\
add account - Clusters=, DefaultQOS=, Description=, Fairshare=,\n\
- GrpCPUMins=, GrpCPUs=, GrpJobs=, GrpNodes=, \n\
- GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxCPUs=,\n\
- MaxJobs=, MaxNodes=, MaxSubmitJobs=, MaxWall=, \n\
- Names=, Organization=, Parent=, and QosLevel= \n\
+ GrpCPUMins=, GrpCPUs=, GrpJobs=, GrpMemory=, \n\
+ GrpNodes=, GrpSubmitJob=, GrpWall=, MaxCPUMins=,\n\
+ MaxCPUs=, MaxJobs=, MaxNodes=, MaxSubmitJobs=, \n\
+ MaxWall=, Names=, Organization=, Parent=, \n\
+ and QosLevel= \n\
modify account - (set options) DefaultQOS=, Description=, \n\
Fairshare=, GrpCPUMins=, GrpCPURunMins=, \n\
- GrpCPUs=, GrpJobs=, GrpNodes=, GrpSubmitJob=, \n\
- GrpWall=, MaxCPUMins=, MaxCPURunMins=, MaxCPUs=,\n\
+ GrpCPUs=, GrpJobs=, GrpMemory=, GrpNodes=, \n\
+ GrpSubmitJob=, GrpWall=, MaxCPUMins=, \n\
+ MaxCPURunMins=, MaxCPUs=, \n\
MaxJobs=, MaxNodes=, MaxSubmitJobs=, MaxWall=, \n\
Names=, Organization=, Parent=, and QosLevel= \n\
RawUsage= (with admin privileges only) \n\
@@ -841,11 +843,11 @@
list cluster - Classification=, DefaultQOS=, Flags=, Format=, \n\
Names=, RPC=, and WOLimits \n\
add cluster - DefaultQOS=, Fairshare=, GrpCPUs=, GrpJobs=, \n\
- GrpNodes=, GrpSubmitJob=, MaxCPUMins= \n\
+ GrpMemory=, GrpNodes=, GrpSubmitJob=, MaxCPUMins=,\n\
MaxJobs=, MaxNodes=, MaxSubmitJobs=, MaxWall=, \n\
Name=, and QosLevel= \n\
- modify cluster - (set options) DefaultQOS=, Fairshare=, \n\
- GrpCPUs=, GrpJobs=, GrpNodes=, GrpSubmitJob=, \n\
+ modify cluster - (set options) DefaultQOS=, Fairshare=, GrpCPUs=,\n\
+ GrpJobs=, GrpMemory=, GrpNodes=, GrpSubmitJob=, \n\
MaxCPUMins=, MaxJobs=, MaxNodes=, MaxSubmitJobs=,\n\
MaxWall=, and QosLevel= \n\
(where options) Classification=, Flags=, \n\
@@ -865,15 +867,15 @@
list qos - Descriptions=, Format=, Id=, Names=, \n\
PreemptMode=, and WithDeleted \n\
add qos - Description=, Flags=, GraceTime=, GrpCPUMins=, \n\
- GGrpCPUs=, GrpJobs=, GrpNodes=, GrpSubmitJob=, \n\
- GrpWall=,MaxCPUMins=, MaxCPUs=, MaxCPUsPerUser=,\n\
- MaxJobs=, MaxNodesPerUser=, MaxCPUsPerUser=, \n\
- MaxNodes=, MaxSubmitJobs=, MaxWall=, Names=, \n\
- Preempt=, PreemptMode=, Priority=, \n\
- UsageFactor=, and UsageThreshold= \n\
+ GGrpCPUs=, GrpJobs=, GrpMemory=, GrpNodes=, \n\
+ GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxCPUs=,\n\
+ MaxCPUsPerUser=, MaxJobs=, MaxNodesPerUser=, \n\
+ MaxCPUsPerUser=, MaxNodes=, MaxSubmitJobs=, \n\
+ MaxWall=, Names=, Preempt=, PreemptMode=, \n\
+ Priority=, UsageFactor=, and UsageThreshold= \n\
modify qos - (set options) Description=, Flags=, GraceTime=,\n\
- GrpCPUMins=, GrpCPURunMins=, GrpCPUs=, \n\
- GrpJobs=, GrpNodes=, GrpSubmitJob=, GrpWall=, \n\
+ GrpCPUMins=, GrpCPURunMins=, GrpCPUs=, GrpJobs=,\n\
+ GrpMemory=, GrpNodes=, GrpSubmitJob=, GrpWall=,\n\
MaxCPUMins=, MaxCPURunMins=, MaxCPUs=, \n\
MaxCPUsPerUser=, MaxJobs=, MaxNodes=, \n\
MaxNodesPerUser=, MaxSubmitJobs=, MaxWall=, \n\
@@ -926,15 +928,16 @@
\n\
Association - Account, Cluster, DefaultQOS, Fairshare, \n\
GrpCPUMins, GrpCPURunMins, GrpCPUs, GrpJobs, \n\
- GrpNodes, GrpSubmitJob, GrpWall, ID, LFT, \n\
- MaxCPUMins, MaxCPURunMins, MaxCPUs, MaxJobs, \n\
- MaxNodes, MaxSubmitJobs, MaxWall, QOS, ParentID,\n\
- ParentName, Partition, RawQOS, RGT, User \n\
+ GrpMemory, GrpNodes, GrpSubmitJob, GrpWall, \n\
+ ID, LFT, MaxCPUMins, MaxCPURunMins, MaxCPUs, \n\
+ MaxJobs, MaxNodes, MaxSubmitJobs, MaxWall, QOS,\n\
+ ParentID, ParentName, Partition, RawQOS, RGT, \n\
+ User \n\
\n\
Cluster - Classification, Cluster, ClusterNodes, \n\
ControlHost, ControlPort, CpuCount, DefaultQOS,\n\
Fairshare, Flags, GrpCPUMins, GrpCPUs, GrpJobs,\n\
- GrpNodes, GrpSubmitJob, MaxCPUMins, \n\
+ GrpMemory, GrpNodes, GrpSubmitJob, MaxCPUMins, \n\
MaxCPUs, MaxJobs, MaxNodes, MaxSubmitJobs, \n\
MaxWall, NodeCount, PluginIDSelect, RPC \n\
\n\
@@ -943,8 +946,8 @@
State, StateRaw, User \n\
\n\
QOS - Description, Flags, GraceTime, GrpCPUMins, \n\
- GrpCPURunMins, GrpCPUs, GrpJobs, GrpNodes, \n\
- GrpSubmitJob, GrpWall, ID, MaxCPUMins, \n\
+ GrpCPURunMins, GrpCPUs, GrpJobs, GrpMemory, \n\
+ GrpNodes, GrpSubmitJob, GrpWall, ID, MaxCPUMins,\n\
MaxCPURunMins, MaxCPUs, MaxCPUsPerUser, \n\
MaxJobs, MaxNodes, MaxNodesPerUser, \n\
MaxSubmitJobs, MaxWall, Name, \n\
diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h
index 41ac729..bb8d9b2 100644
--- a/src/sacctmgr/sacctmgr.h
+++ b/src/sacctmgr/sacctmgr.h
@@ -107,6 +107,7 @@
PRINT_GRPCRM,
PRINT_GRPC,
PRINT_GRPJ,
+ PRINT_GRPMEM,
PRINT_GRPN,
PRINT_GRPS,
PRINT_GRPW,
diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c
index c3a8297..e26d845 100644
--- a/src/sacctmgr/user_functions.c
+++ b/src/sacctmgr/user_functions.c
@@ -1065,6 +1065,7 @@
start_assoc.grp_cpu_mins;
assoc->grp_cpus = start_assoc.grp_cpus;
assoc->grp_jobs = start_assoc.grp_jobs;
+ assoc->grp_mem = start_assoc.grp_mem;
assoc->grp_nodes =
start_assoc.grp_nodes;
assoc->grp_submit_jobs =
@@ -1133,6 +1134,7 @@
start_assoc.grp_cpu_mins;
assoc->grp_cpus = start_assoc.grp_cpus;
assoc->grp_jobs = start_assoc.grp_jobs;
+ assoc->grp_mem = start_assoc.grp_mem;
assoc->grp_nodes = start_assoc.grp_nodes;
assoc->grp_submit_jobs =
start_assoc.grp_submit_jobs;
@@ -1553,7 +1555,7 @@
name);
break;
}
-
+
/* This means there wasn't a
default on the
current cluster.
@@ -1931,13 +1933,14 @@
}
notice_thread_init();
- if(cond_set == 1) {
+ if (cond_set == 1) {
ret_list = acct_storage_g_remove_users(
db_conn, my_uid, user_cond);
- } else if(cond_set & 2) {
+ } else if (cond_set & 2) {
ret_list = acct_storage_g_remove_associations(
db_conn, my_uid, user_cond->assoc_cond);
}
+
rc = errno;
notice_thread_fini();
@@ -1945,6 +1948,7 @@
if(ret_list && list_count(ret_list)) {
char *object = NULL;
+ List del_user_list = NULL;
ListIterator itr = list_iterator_create(ret_list);
/* If there were jobs running with an association to
be deleted, don't.
@@ -1967,8 +1971,82 @@
}
while((object = list_next(itr))) {
printf(" %s\n", object);
+ if (cond_set & 2) {
+ if (!del_user_list)
+ del_user_list = list_create(
+ slurm_destroy_char);
+ slurm_addto_char_list(del_user_list,
+ strstr(object, "U = ")+4);
+ }
}
list_iterator_destroy(itr);
+
+ /* Remove user if no associations left. */
+ if (cond_set & 2 && del_user_list) {
+ List user_list = NULL;
+ slurmdb_user_cond_t del_user_cond;
+ slurmdb_association_cond_t del_user_assoc_cond;
+ slurmdb_user_rec_t *user = NULL;
+
+ /* Use a fresh cond here so we check all
+ clusters and such to make sure there are no
+ associations.
+ */
+ memset(&del_user_cond, 0, sizeof(slurmdb_user_cond_t));
+ memset(&del_user_assoc_cond, 0,
+ sizeof(slurmdb_association_cond_t));
+ del_user_cond.with_assocs = 1;
+ del_user_assoc_cond.user_list = del_user_list;
+ /* No need to get all the extra info about the
+ association, just want to know if it
+ exists.
+ */
+ del_user_assoc_cond.without_parent_info = 1;
+ del_user_cond.assoc_cond = &del_user_assoc_cond;
+ user_list = acct_storage_g_get_users(
+ db_conn, my_uid, &del_user_cond);
+ list_destroy(del_user_list);
+ del_user_list = NULL;
+
+ if (user_list) {
+ itr = list_iterator_create(user_list);
+ while ((user = list_next(itr))) {
+ if (user->assoc_list)
+ continue;
+ if (!del_user_list) {
+ del_user_list = list_create(
+ slurm_destroy_char);
+ printf(" Deleting users "
+ "(No Associations)"
+ "...\n");
+ }
+ printf(" %s\n", user->name);
+ slurm_addto_char_list(del_user_list,
+ user->name);
+ }
+ list_iterator_destroy(itr);
+ list_destroy(user_list);
+ }
+
+ if (del_user_list) {
+ List del_user_ret_list = NULL;
+
+ memset(&del_user_cond, 0,
+ sizeof(slurmdb_user_cond_t));
+ memset(&del_user_assoc_cond, 0,
+ sizeof(slurmdb_association_cond_t));
+
+ del_user_assoc_cond.user_list = del_user_list;
+ del_user_cond.assoc_cond = &del_user_assoc_cond;
+
+ del_user_ret_list = acct_storage_g_remove_users(
+ db_conn, my_uid, &del_user_cond);
+ if (del_user_ret_list)
+ list_destroy(del_user_ret_list);
+ list_destroy(del_user_list);
+ }
+ }
+
if(commit_check("Would you like to commit changes?")) {
acct_storage_g_commit(db_conn, 1);
} else {
@@ -1984,8 +2062,7 @@
rc = SLURM_ERROR;
}
-
- if(ret_list)
+ if (ret_list)
list_destroy(ret_list);
return rc;
@@ -2067,7 +2144,7 @@
printf(" Removing all users from Accounts\n%s", acct_str);
notice_thread_init();
- ret_list = acct_storage_g_remove_coord(db_conn, my_uid,
+ ret_list = acct_storage_g_remove_coord(db_conn, my_uid,
user_cond->assoc_cond->acct_list,
user_cond);
slurmdb_destroy_user_cond(user_cond);
diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in
index 82a21c7..2b9f81e 100644
--- a/src/salloc/Makefile.in
+++ b/src/salloc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -192,6 +192,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -228,6 +229,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -399,7 +401,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-salloc$(EXEEXT): $(salloc_OBJECTS) $(salloc_DEPENDENCIES)
+salloc$(EXEEXT): $(salloc_OBJECTS) $(salloc_DEPENDENCIES) $(EXTRA_salloc_DEPENDENCIES)
@rm -f salloc$(EXEEXT)
$(salloc_LINK) $(salloc_OBJECTS) $(salloc_LDADD) $(LIBS)
@@ -538,10 +540,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 4d9f962..73d5fc5 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -113,6 +113,7 @@
#define OPT_WCKEY 0x14
#define OPT_SIGNAL 0x15
#define OPT_KILL_CMD 0x16
+#define OPT_TIME_VAL 0x17
/* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_CPU_BIND 0x101
@@ -405,7 +406,7 @@
{"SALLOC_WAIT_ALL_NODES",OPT_INT, &opt.wait_all_nodes,NULL },
{"SALLOC_WCKEY", OPT_STRING, &opt.wckey, NULL },
{"SALLOC_REQ_SWITCH", OPT_INT, &opt.req_switch, NULL },
- {"SALLOC_WAIT4SWITCH", OPT_INT, &opt.wait4switch, NULL },
+ {"SALLOC_WAIT4SWITCH", OPT_TIME_VAL, NULL, NULL },
{NULL, 0, NULL, NULL}
};
@@ -558,6 +559,11 @@
}
opt.kill_command_signal_set = true;
break;
+
+ case OPT_TIME_VAL:
+ opt.wait4switch = time_str2secs(val);
+ break;
+
default:
/* do nothing */
break;
@@ -945,7 +951,7 @@
error("duplicate --gid option");
exit(error_exit);
}
- if (gid_from_string (optarg, &opt.euid) < 0) {
+ if (gid_from_string (optarg, &opt.egid) < 0) {
error("--gid=\"%s\" invalid", optarg);
exit(error_exit);
}
@@ -1060,6 +1066,13 @@
}
break;
case LONG_OPT_REBOOT:
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+ info("WARNING: If your job is smaller than the block "
+ "it is going to run on and other jobs are "
+ "running on it the --reboot option will not be "
+ "honored. If this is the case, contact your "
+ "admin to reboot the block for you.");
+#endif
opt.reboot = true;
break;
case LONG_OPT_BLRTS_IMAGE:
@@ -1141,8 +1154,7 @@
if (pos_delimit != NULL) {
pos_delimit[0] = '\0';
pos_delimit++;
- opt.wait4switch = time_str2mins(pos_delimit) *
- 60;
+ opt.wait4switch = time_str2secs(pos_delimit);
}
opt.req_switch = _get_int(optarg, "switches");
break;
@@ -1803,7 +1815,12 @@
" [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
" [--account=name] [--dependency=type:jobid] [--comment=name]\n"
#ifdef HAVE_BG /* Blue gene specific options */
-" [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [ --reboot]\n"
+#ifdef HAVE_BG_L_P
+" [--geometry=XxYxZ] "
+#else
+" [--geometry=AxXxYxZ] "
+#endif
+"[--conn-type=type] [--no-rotate] [--reboot]\n"
#ifdef HAVE_BGL
" [--blrts-image=path] [--linux-image=path]\n"
" [--mloader-image=path] [--ramdisk-image=path]\n"
@@ -1920,7 +1937,13 @@
#endif
#ifdef HAVE_BG /* Blue gene specific options */
"Blue Gene related options:\n"
+#ifdef HAVE_BG_L_P
" -g, --geometry=XxYxZ geometry constraints of the job\n"
+#else
+" -g, --geometry=AxXxYxZ Midplane geometry constraints of the job,\n"
+" sub-block allocations can not be allocated\n"
+" with the geometry option\n"
+#endif
" -R, --no-rotate disable geometry rotation\n"
" --reboot reboot block before starting job\n"
" --conn-type=type constraint on type of connection, MESH or TORUS\n"
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index e6b9164..d2e3629 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -42,6 +42,7 @@
# include "config.h"
#endif
+#include <sys/resource.h> /* for struct rlimit */
#include <dirent.h>
#include <fcntl.h>
#include <pwd.h>
@@ -361,7 +362,7 @@
}
#else
if (!_wait_nodes_ready(alloc)) {
- if(!allocation_interrupted)
+ if (!allocation_interrupted)
error("Something is wrong with the "
"boot of the nodes.");
goto relinquish;
@@ -584,7 +585,6 @@
/* Returns 0 on success, -1 on failure */
static int _fill_job_desc_from_opts(job_desc_msg_t *desc)
{
- int i;
#ifdef HAVE_REAL_CRAY
uint64_t pagg_id = job_getjid(getpid());
/*
@@ -621,7 +621,7 @@
desc->exc_nodes = opt.exc_nodes;
desc->partition = opt.partition;
desc->min_nodes = opt.min_nodes;
- if (opt.max_nodes)
+ if (opt.nodes_set)
desc->max_nodes = opt.max_nodes;
desc->user_id = opt.uid;
desc->group_id = opt.gid;
@@ -670,15 +670,13 @@
desc->priority = 0;
#ifdef HAVE_BG
if (opt.geometry[0] > 0) {
- for (i=0; i<SYSTEM_DIMENSIONS; i++)
+ int i;
+ for (i = 0; i < SYSTEM_DIMENSIONS; i++)
desc->geometry[i] = opt.geometry[i];
}
#endif
- for (i=0; i<HIGHEST_DIMENSIONS; i++) {
- if (opt.conn_type[i] == (uint16_t)NO_VAL)
- break;
- desc->conn_type[i] = opt.conn_type[i];
- }
+ memcpy(desc->conn_type, opt.conn_type, sizeof(desc->conn_type));
+
if (opt.reboot)
desc->reboot = 1;
if (opt.no_rotate)
@@ -815,7 +813,7 @@
{
allocation_interrupted = true;
if (pending_job_id != 0) {
- slurm_complete_job(pending_job_id, 0);
+ slurm_complete_job(pending_job_id, NO_VAL);
}
}
@@ -1075,13 +1073,17 @@
suspend_time = slurm_get_suspend_timeout();
resume_time = slurm_get_resume_timeout();
- if ((suspend_time == 0) || (resume_time == 0))
- return 1; /* Power save mode disabled */
- max_delay = suspend_time + resume_time;
- max_delay *= 5; /* Allow for ResumeRate support */
+ if (suspend_time || resume_time) {
+ max_delay = suspend_time + resume_time;
+ max_delay *= 5; /* Allow for ResumeRate support */
+ } else {
+ max_delay = 300; /* Wait to 5 min for PrologSlurmctld */
+ }
pending_job_id = alloc->job_id;
+ if (alloc->alias_list && !strcmp(alloc->alias_list, "TBD"))
+ opt.wait_all_nodes = 1; /* Wait for boot & addresses */
if (opt.wait_all_nodes == (uint16_t) NO_VAL)
opt.wait_all_nodes = DEFAULT_WAIT_ALL_NODES;
@@ -1095,20 +1097,15 @@
cur_delay += POLL_SLEEP;
}
- if (opt.wait_all_nodes)
- rc = slurm_job_node_ready(alloc->job_id);
- else {
- is_ready = 1;
- break;
- }
-
+ rc = slurm_job_node_ready(alloc->job_id);
if (rc == READY_JOB_FATAL)
break; /* fatal error */
if ((rc == READY_JOB_ERROR) || (rc == EAGAIN))
continue; /* retry */
if ((rc & READY_JOB_STATE) == 0) /* job killed */
break;
- if (rc & READY_NODE_STATE) { /* job and node ready */
+ if ((rc & READY_JOB_STATE) &&
+ ((rc & READY_NODE_STATE) || !opt.wait_all_nodes)) {
is_ready = 1;
break;
}
@@ -1116,8 +1113,18 @@
break;
}
if (is_ready) {
+ resource_allocation_response_msg_t *resp;
+ char *tmp_str;
if (i > 0)
- info ("Nodes %s are ready for job", alloc->node_list);
+ info("Nodes %s are ready for job", alloc->node_list);
+ if (alloc->alias_list && !strcmp(alloc->alias_list, "TBD") &&
+ (slurm_allocation_lookup_lite(pending_job_id, &resp)
+ == SLURM_SUCCESS)) {
+ tmp_str = alloc->alias_list;
+ alloc->alias_list = resp->alias_list;
+ resp->alias_list = tmp_str;
+ slurm_free_resource_allocation_response_msg(resp);
+ }
} else if (!allocation_interrupted)
error("Nodes %s are still not ready", alloc->node_list);
else /* allocation_interrupted or slurmctld not responing */
diff --git a/src/sattach/Makefile.in b/src/sattach/Makefile.in
index 1b423e0..049ad63 100644
--- a/src/sattach/Makefile.in
+++ b/src/sattach/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -192,6 +192,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -228,6 +229,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -405,7 +407,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sattach$(EXEEXT): $(sattach_OBJECTS) $(sattach_DEPENDENCIES)
+sattach$(EXEEXT): $(sattach_OBJECTS) $(sattach_DEPENDENCIES) $(EXTRA_sattach_DEPENDENCIES)
@rm -f sattach$(EXEEXT)
$(sattach_LINK) $(sattach_OBJECTS) $(sattach_LDADD) $(LIBS)
@@ -546,10 +548,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c
index 697f424..3a8b45e 100644
--- a/src/sattach/sattach.c
+++ b/src/sattach/sattach.c
@@ -456,7 +456,7 @@
mts->resp_port = xmalloc(sizeof(uint16_t) * mts->num_resp_port);
for (i = 0; i < mts->num_resp_port; i++) {
if (net_stream_listen(&sock, &port) < 0) {
- error("unable to intialize step launch"
+ error("unable to initialize step launch"
" listening socket: %m");
goto fail;
}
diff --git a/src/sbatch/Makefile.in b/src/sbatch/Makefile.in
index 64b9fbb..a767b2c 100644
--- a/src/sbatch/Makefile.in
+++ b/src/sbatch/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -192,6 +192,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -228,6 +229,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -399,7 +401,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sbatch$(EXEEXT): $(sbatch_OBJECTS) $(sbatch_DEPENDENCIES)
+sbatch$(EXEEXT): $(sbatch_OBJECTS) $(sbatch_DEPENDENCIES) $(EXTRA_sbatch_DEPENDENCIES)
@rm -f sbatch$(EXEEXT)
$(sbatch_LINK) $(sbatch_OBJECTS) $(sbatch_LDADD) $(LIBS)
@@ -539,10 +541,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 84b5f57..a13b434 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -114,6 +114,7 @@
#define OPT_GET_USER_ENV 0x16
#define OPT_EXPORT 0x17
#define OPT_CLUSTERS 0x18
+#define OPT_TIME_VAL 0x19
/* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_PROPAGATE 0x100
@@ -479,7 +480,7 @@
{"SBATCH_GET_USER_ENV", OPT_GET_USER_ENV, NULL, NULL },
{"SBATCH_EXPORT", OPT_STRING, &opt.export_env, NULL },
{"SBATCH_REQ_SWITCH", OPT_INT, &opt.req_switch, NULL },
- {"SBATCH_WAIT4SWITCH", OPT_INT, &opt.wait4switch, NULL },
+ {"SBATCH_WAIT4SWITCH", OPT_TIME_VAL, NULL, NULL },
{NULL, 0, NULL, NULL}
};
@@ -648,6 +649,9 @@
exit(1);
}
break;
+ case OPT_TIME_VAL:
+ opt.wait4switch = time_str2secs(val);
+ break;
default:
/* do nothing */
break;
@@ -1179,7 +1183,7 @@
break;
case 'e':
xfree(opt.efname);
- if (strncasecmp(optarg, "none", (size_t)4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.efname = xstrdup("/dev/null");
else
opt.efname = xstrdup(optarg);
@@ -1208,7 +1212,7 @@
break;
case 'i':
xfree(opt.ifname);
- if (strncasecmp(optarg, "none", (size_t)4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.ifname = xstrdup("/dev/null");
else
opt.ifname = xstrdup(optarg);
@@ -1267,7 +1271,7 @@
break;
case 'o':
xfree(opt.ofname);
- if (strncasecmp(optarg, "none", (size_t)4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.ofname = xstrdup("/dev/null");
else
opt.ofname = xstrdup(optarg);
@@ -1555,6 +1559,13 @@
opt.ramdiskimage = xstrdup(optarg);
break;
case LONG_OPT_REBOOT:
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+ info("WARNING: If your job is smaller than the block "
+ "it is going to run on and other jobs are "
+ "running on it the --reboot option will not be "
+ "honored. If this is the case, contact your "
+ "admin to reboot the block for you.");
+#endif
opt.reboot = true;
break;
case LONG_OPT_WRAP:
@@ -1643,7 +1654,7 @@
if (pos_delimit != NULL) {
pos_delimit[0] = '\0';
pos_delimit++;
- opt.wait4switch = time_str2mins(pos_delimit) * 60;
+ opt.wait4switch = time_str2secs(pos_delimit);
}
opt.req_switch = _get_int(optarg, "switches");
break;
@@ -1737,7 +1748,7 @@
break;
case 'e':
xfree(opt.efname);
- if (strncasecmp(optarg, "none", (size_t) 4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.efname = xstrdup("/dev/null");
else
opt.efname = xstrdup(optarg);
@@ -1771,7 +1782,7 @@
break;
case 'o':
xfree(opt.ofname);
- if (strncasecmp(optarg, "none", (size_t) 4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.ofname = xstrdup("/dev/null");
else
opt.ofname = xstrdup(optarg);
@@ -2150,7 +2161,7 @@
setenv("SLURM_JOB_NAME", opt.job_name, 0);
/* check for realistic arguments */
- if (opt.ntasks <= 0) {
+ if (opt.ntasks < 0) {
error("invalid number of tasks (-n %d)", opt.ntasks);
verified = false;
}
@@ -2354,8 +2365,7 @@
if(!opt.nodes_set) {
opt.nodes_set = 1;
hostlist_uniq(hl);
- opt.min_nodes = opt.max_nodes
- = hostlist_count(hl);
+ opt.min_nodes = opt.max_nodes = hostlist_count(hl);
}
hostlist_destroy(hl);
}
@@ -2788,7 +2798,12 @@
" [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
" [--account=name] [--dependency=type:jobid] [--comment=name]\n"
#ifdef HAVE_BG /* Blue gene specific options */
-" [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [ --reboot]\n"
+#ifdef HAVE_BG_L_P
+" [--geometry=XxYxZ] "
+#else
+" [--geometry=AxXxYxZ] "
+#endif
+"[--conn-type=type] [--no-rotate] [--reboot]\n"
#ifdef HAVE_BGL
" [--blrts-image=path] [--linux-image=path]\n"
" [--mloader-image=path] [--ramdisk-image=path]\n"
@@ -2913,7 +2928,13 @@
#endif
#ifdef HAVE_BG /* Blue gene specific options */
"Blue Gene related options:\n"
+#ifdef HAVE_BG_L_P
" -g, --geometry=XxYxZ geometry constraints of the job\n"
+#else
+" -g, --geometry=AxXxYxZ Midplane geometry constraints of the job,\n"
+" sub-block allocations can not be allocated\n"
+" with the geometry option\n"
+#endif
" -R, --no-rotate disable geometry rotation\n"
" --reboot reboot block before starting job\n"
" --conn-type=type constraint on type of connection, MESH or TORUS\n"
diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c
index 07d2351..0a2d87d 100644
--- a/src/sbatch/sbatch.c
+++ b/src/sbatch/sbatch.c
@@ -41,6 +41,7 @@
# include "config.h"
#endif
+#include <sys/resource.h> /* for RLIMIT_NOFILE */
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -90,7 +91,7 @@
_set_exit_code();
if (spank_init_allocator() < 0) {
- error("Failed to intialize plugin stack");
+ error("Failed to initialize plugin stack");
exit(error_exit);
}
@@ -259,7 +260,7 @@
/* Returns 0 on success, -1 on failure */
static int _fill_job_desc_from_opts(job_desc_msg_t *desc)
{
- int i;
+ int i;
extern char **environ;
if (opt.jobid_set)
@@ -278,12 +279,14 @@
desc->req_nodes = opt.nodelist;
desc->exc_nodes = opt.exc_nodes;
desc->partition = opt.partition;
- if (opt.min_nodes)
- desc->min_nodes = opt.min_nodes;
if (opt.licenses)
desc->licenses = xstrdup(opt.licenses);
- if (opt.max_nodes)
- desc->max_nodes = opt.max_nodes;
+ if (opt.nodes_set) {
+ desc->min_nodes = opt.min_nodes;
+ if (opt.max_nodes)
+ desc->max_nodes = opt.max_nodes;
+ } else if (opt.ntasks_set && (opt.ntasks == 0))
+ desc->min_nodes = 0;
if (opt.ntasks_per_node)
desc->ntasks_per_node = opt.ntasks_per_node;
desc->user_id = opt.uid;
@@ -328,11 +331,8 @@
desc->geometry[i] = opt.geometry[i];
}
- for (i=0; i<HIGHEST_DIMENSIONS; i++) {
- if (opt.conn_type[i] == (uint16_t)NO_VAL)
- break;
- desc->conn_type[i] = opt.conn_type[i];
- }
+ memcpy(desc->conn_type, opt.conn_type, sizeof(desc->conn_type));
+
if (opt.reboot)
desc->reboot = 1;
if (opt.no_rotate)
@@ -360,9 +360,10 @@
desc->overcommit = opt.overcommit;
} else if (opt.cpus_set)
desc->min_cpus = opt.ntasks * opt.cpus_per_task;
+ else if (opt.nodes_set && (opt.min_nodes == 0))
+ desc->min_cpus = 0;
else
desc->min_cpus = opt.ntasks;
- desc->max_cpus = desc->max_cpus;
if (opt.ntasks_set)
desc->num_tasks = opt.ntasks;
@@ -417,7 +418,7 @@
"SLURM_GET_USER_ENV", "1");
}
- if(opt.distribution == SLURM_DIST_ARBITRARY) {
+ if (opt.distribution == SLURM_DIST_ARBITRARY) {
env_array_overwrite_fmt(&desc->environment,
"SLURM_ARBITRARY_NODELIST",
"%s", desc->req_nodes);
diff --git a/src/sbcast/Makefile.in b/src/sbcast/Makefile.in
index f35a0d3..ce678c7 100644
--- a/src/sbcast/Makefile.in
+++ b/src/sbcast/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -194,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -400,7 +402,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sbcast$(EXEEXT): $(sbcast_OBJECTS) $(sbcast_DEPENDENCIES)
+sbcast$(EXEEXT): $(sbcast_OBJECTS) $(sbcast_DEPENDENCIES) $(EXTRA_sbcast_DEPENDENCIES)
@rm -f sbcast$(EXEEXT)
$(sbcast_LINK) $(sbcast_OBJECTS) $(sbcast_LDADD) $(LIBS)
@@ -540,10 +542,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/scancel/Makefile.in b/src/scancel/Makefile.in
index 3533dbb..387938f 100644
--- a/src/scancel/Makefile.in
+++ b/src/scancel/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -193,6 +193,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -229,6 +230,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -399,7 +401,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-scancel$(EXEEXT): $(scancel_OBJECTS) $(scancel_DEPENDENCIES)
+scancel$(EXEEXT): $(scancel_OBJECTS) $(scancel_DEPENDENCIES) $(EXTRA_scancel_DEPENDENCIES)
@rm -f scancel$(EXEEXT)
$(scancel_LINK) $(scancel_OBJECTS) $(scancel_LDADD) $(LIBS)
@@ -538,10 +540,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/scancel/opt.c b/src/scancel/opt.c
index 445e9f0..875c0d5 100644
--- a/src/scancel/opt.c
+++ b/src/scancel/opt.c
@@ -203,7 +203,7 @@
/*
* opt_default(): used by initialize_and_process_args to set defaults
*/
-static void _opt_default()
+static void _opt_default(void)
{
opt.account = NULL;
opt.batch = false;
@@ -233,7 +233,7 @@
* environment variables. See comments above for how to
* extend srun to process different vars
*/
-static void _opt_env()
+static void _opt_env(void)
{
char *val;
@@ -548,7 +548,7 @@
printf("Usage: scancel [OPTIONS] [job_id[.step_id]]\n");
printf(" -A, --account=account act only on jobs charging this account\n");
printf(" -b, --batch signal batch shell for specified job\n");
-/* printf(" --ctld route request through slurmctld\n"); */
+/* printf(" --ctld send request directly to slurmctld\n"); */
printf(" -i, --interactive require response from user for each job\n");
printf(" -n, --name=job_name act only on jobs with this name\n");
printf(" -p, --partition=partition act only on jobs in this partition\n");
diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c
index 7e8920f..00f58d9 100644
--- a/src/scancel/scancel.c
+++ b/src/scancel/scancel.c
@@ -362,7 +362,6 @@
for (j = 0; j < opt.job_cnt; j++ ) {
if (job_ptr[i].job_id != opt.job_id[j])
continue;
-
if (opt.interactive &&
(_confirmation(i, opt.step_id[j]) == 0))
continue;
@@ -611,7 +610,6 @@
job_ptr[i].job_id, step_id, job_ptr[i].name,
job_ptr[i].partition);
}
-
if (fgets(in_line, sizeof(in_line), stdin) == NULL)
continue;
if ((in_line[0] == 'y') || (in_line[0] == 'Y'))
diff --git a/src/scontrol/Makefile.in b/src/scontrol/Makefile.in
index a714dc1..a4db832 100644
--- a/src/scontrol/Makefile.in
+++ b/src/scontrol/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -195,6 +195,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -231,6 +232,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -417,7 +419,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-scontrol$(EXEEXT): $(scontrol_OBJECTS) $(scontrol_DEPENDENCIES)
+scontrol$(EXEEXT): $(scontrol_OBJECTS) $(scontrol_DEPENDENCIES) $(EXTRA_scontrol_DEPENDENCIES)
@rm -f scontrol$(EXEEXT)
$(scontrol_LINK) $(scontrol_OBJECTS) $(scontrol_LDADD) $(LIBS)
@@ -565,10 +567,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/scontrol/create_res.c b/src/scontrol/create_res.c
index 124ff16..ca68f1d 100644
--- a/src/scontrol/create_res.c
+++ b/src/scontrol/create_res.c
@@ -76,7 +76,7 @@
/*
* _parse_flags is used to parse the Flags= option. It handles
- * daily, weekly, and maint, optionally preceded by + or -,
+ * daily, weekly, static_alloc, and maint, optionally preceded by + or -,
* separated by a comma but no spaces.
*/
static uint32_t _parse_flags(const char *flagstr, const char *msg)
@@ -137,6 +137,13 @@
outflags |= RESERVE_FLAG_NO_LIC_ONLY;
else
outflags |= RESERVE_FLAG_LIC_ONLY;
+ } else if (strncasecmp(curr, "Static_Alloc", MAX(taglen,1))
+ == 0) {
+ curr += taglen;
+ if (flip)
+ outflags |= RESERVE_FLAG_NO_STATIC;
+ else
+ outflags |= RESERVE_FLAG_STATIC;
} else {
error("Error parsing flags %s. %s", flagstr, msg);
return 0xffffffff;
@@ -244,19 +251,38 @@
}
} else if (strncasecmp(tag, "NodeCnt", MAX(taglen,5)) == 0 ||
strncasecmp(tag, "NodeCount", MAX(taglen,5)) == 0) {
- char *endptr = NULL;
- resv_msg_ptr->node_cnt = strtol(val, &endptr, 10);
- if (endptr != NULL &&
- ((endptr[0] == 'k') || (endptr[0] == 'K'))) {
- resv_msg_ptr->node_cnt *= 1024;
-
- } else if ((endptr == NULL) || (endptr[0] != '\0') ||
- (val[0] == '\0')) {
- exit_code = 1;
- error("Invalid node count %s. %s",
- argv[i], msg);
- return -1;
+ char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL;
+ int node_inx = 0;
+ node_cnt = xstrdup(val);
+ tok = strtok_r(node_cnt, ",", &ptrptr);
+ while (tok) {
+ xrealloc(resv_msg_ptr->node_cnt,
+ sizeof(uint32_t) * (node_inx + 2));
+ resv_msg_ptr->node_cnt[node_inx] =
+ strtol(tok, &endptr, 10);
+ if ((endptr != NULL) &&
+ ((endptr[0] == 'k') ||
+ (endptr[0] == 'K'))) {
+ resv_msg_ptr->node_cnt[node_inx] *=
+ 1024;
+ } else if ((endptr != NULL) &&
+ ((endptr[0] == 'm') ||
+ (endptr[0] == 'M'))) {
+ resv_msg_ptr->node_cnt[node_inx] *=
+ 1024 * 1024;
+ } else if ((endptr == NULL) ||
+ (endptr[0] != '\0') ||
+ (tok[0] == '\0')) {
+ exit_code = 1;
+ error("Invalid node count %s. %s",
+ argv[i], msg);
+ xfree(node_cnt);
+ return -1;
+ }
+ node_inx++;
+ tok = strtok_r(NULL, ",", &ptrptr);
}
+ xfree(node_cnt);
} else if (strncasecmp(tag, "Nodes", MAX(taglen, 5)) == 0) {
resv_msg_ptr->node_list = val;
@@ -347,6 +373,36 @@
/*
+ * Determine total node count for named partition.
+ */
+static uint32_t _partition_node_count(char *partition_name)
+{
+ int error_code, i;
+ uint16_t show_flags = 0;
+ uint32_t node_count = 0;
+ partition_info_msg_t *part_info_ptr = NULL;
+ partition_info_t *part_ptr = NULL;
+
+ error_code = slurm_load_partitions((time_t) NULL,
+ &part_info_ptr, show_flags);
+ if (error_code != SLURM_SUCCESS) {
+ slurm_free_partition_info_msg (part_info_ptr);
+ return NO_VAL;
+ }
+
+ part_ptr = part_info_ptr->partition_array;;
+ for (i = 0; i < part_info_ptr->record_count; i++) {
+ if (strcmp (partition_name, part_ptr[i].name))
+ continue;
+ node_count = part_ptr[i].total_nodes;
+ }
+ slurm_free_partition_info_msg (part_info_ptr);
+ return node_count;
+}
+
+
+
+/*
* scontrol_create_res - create the slurm reservation configuration per the
* supplied arguments
* IN argc - count of arguments
@@ -357,9 +413,11 @@
extern int
scontrol_create_res(int argc, char *argv[])
{
- resv_desc_msg_t resv_msg;
+ resv_desc_msg_t resv_msg;
char *new_res_name = NULL;
int free_user_str = 0, free_acct_str = 0;
+ int free_node_cnt = 0;
+ uint32_t node_count = 0;
int err, ret = 0;
slurm_init_resv_desc_msg (&resv_msg);
@@ -396,13 +454,50 @@
"No reservation created.");
goto SCONTROL_CREATE_RES_CLEANUP;
}
- if (resv_msg.node_cnt == NO_VAL &&
+ /*
+ * If "all" is specified for the nodes and a partition is specified,
+ * only allocate all of the nodes the partition.
+ */
+
+ if ((resv_msg.partition != NULL) && (resv_msg.node_list != NULL) &&
+ (strcasecmp(resv_msg.node_list, "ALL") == 0)) {
+ node_count = _partition_node_count(resv_msg.partition);
+ if (node_count == NO_VAL) {
+ exit_code = 1;
+ error("Can not determine node count for partition. "
+ "No reservation created.");
+ goto SCONTROL_CREATE_RES_CLEANUP;
+ } else {
+ free_node_cnt = 1;
+ resv_msg.node_cnt = xmalloc(sizeof(uint32_t) * 2);
+ *resv_msg.node_cnt = node_count;
+ resv_msg.node_list = NULL;
+ }
+ }
+
+ /*
+ * If the following parameters are null, but a partition is named, then
+ * make the reservation for the whole partition.
+ */
+ if ((resv_msg.node_cnt == NULL || resv_msg.node_cnt[0] == 0) &&
(resv_msg.node_list == NULL || resv_msg.node_list[0] == '\0') &&
(resv_msg.licenses == NULL || resv_msg.licenses[0] == '\0')) {
- exit_code = 1;
- error("Nodes, NodeCnt or Licenses must be specified. "
- "No reservation created.");
- goto SCONTROL_CREATE_RES_CLEANUP;
+ if (resv_msg.partition == NULL) {
+ exit_code = 1;
+ error("Nodes, NodeCnt or Licenses must be specified. "
+ "No reservation created.");
+ goto SCONTROL_CREATE_RES_CLEANUP;
+ } else if ((node_count = _partition_node_count(resv_msg.partition))
+ == NO_VAL) {
+ exit_code = 1;
+ error("Can not determine node count for partition. "
+ "No reservation created.");
+ goto SCONTROL_CREATE_RES_CLEANUP;
+ } else {
+ free_node_cnt = 1;
+ resv_msg.node_cnt = xmalloc(sizeof(uint32_t) * 2);
+ *resv_msg.node_cnt = node_count;
+ }
}
if ((resv_msg.users == NULL || resv_msg.users[0] == '\0') &&
(resv_msg.accounts == NULL || resv_msg.accounts[0] == '\0')) {
@@ -427,5 +522,7 @@
xfree(resv_msg.users);
if (free_acct_str)
xfree(resv_msg.accounts);
+ if (free_node_cnt)
+ xfree(resv_msg.node_cnt);
return ret;
}
diff --git a/src/scontrol/info_job.c b/src/scontrol/info_job.c
index 97caab6..3f9e8b0 100644
--- a/src/scontrol/info_job.c
+++ b/src/scontrol/info_job.c
@@ -84,8 +84,11 @@
if (all_flag)
show_flags |= SHOW_ALL;
- if (detail_flag)
+ if (detail_flag) {
show_flags |= SHOW_DETAIL;
+ if (detail_flag > 1)
+ show_flags |= SHOW_DETAIL2;
+ }
if (old_job_info_ptr) {
if (last_show_flags != show_flags)
diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c
index 5f26181..96a33d7 100644
--- a/src/scontrol/scontrol.c
+++ b/src/scontrol/scontrol.c
@@ -143,7 +143,7 @@
all_flag = 1;
break;
case (int)'d':
- detail_flag = 1;
+ detail_flag++;
break;
case (int)'h':
_usage ();
@@ -552,6 +552,41 @@
}
/*
+ * _reboot_nodes - issue RPC to have computing nodes reboot when idle
+ * RET 0 or a slurm error code
+ */
+static int _reboot_nodes(char *node_list)
+{
+ slurm_ctl_conf_t *conf;
+ int rc;
+ slurm_msg_t msg;
+ reboot_msg_t req;
+
+ conf = slurm_conf_lock();
+ if (conf->reboot_program == NULL) {
+ error("RebootProgram isn't defined");
+ slurm_conf_unlock();
+ slurm_seterrno(SLURM_ERROR);
+ return SLURM_ERROR;
+ }
+ slurm_conf_unlock();
+
+ slurm_msg_t_init(&msg);
+
+ req.node_list = node_list;
+ msg.msg_type = REQUEST_REBOOT_NODES;
+ msg.data = &req;
+
+ if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
+ return SLURM_ERROR;
+
+ if (rc)
+ slurm_seterrno_ret(rc);
+
+ return rc;
+}
+
+/*
* _process_command - process the user's command
* IN argc - count of arguments
* IN argv - the arguments
@@ -671,6 +706,16 @@
}
detail_flag = 1;
}
+ else if (strncasecmp (tag, "script", MAX(tag_len, 3)) == 0) {
+ if (argc > 1) {
+ exit_code = 1;
+ fprintf (stderr,
+ "too many arguments for keyword:%s\n",
+ tag);
+ return 0;
+ }
+ detail_flag = 2;
+ }
else if (strncasecmp (tag, "exit", MAX(tag_len, 1)) == 0) {
if (argc > 1) {
exit_code = 1;
@@ -743,6 +788,22 @@
}
exit_flag = 1;
}
+ else if (strncasecmp (tag, "reboot_nodes", MAX(tag_len, 3)) == 0) {
+ if (argc > 2) {
+ exit_code = 1;
+ fprintf (stderr,
+ "too many arguments for keyword:%s\n",
+ tag);
+ } else if (argc < 2) {
+ error_code = _reboot_nodes("ALL");
+ } else
+ error_code = _reboot_nodes(argv[1]);
+ if (error_code) {
+ exit_code = 1;
+ if (quiet_flag != 1)
+ slurm_perror ("scontrol_reboot_nodes error");
+ }
+ }
else if (strncasecmp (tag, "reconfigure", MAX(tag_len, 3)) == 0) {
if (argc > 2) {
exit_code = 1;
@@ -988,7 +1049,7 @@
}
}
}
- else if (strncasecmp (tag, "schedloglevel", MAX(tag_len, 2)) == 0) {
+ else if (strncasecmp (tag, "schedloglevel", MAX(tag_len, 3)) == 0) {
if (argc > 2) {
exit_code = 1;
if (quiet_flag != 1)
@@ -1405,7 +1466,8 @@
step_tag = 1;
} else if (!strncasecmp(tag, "BlockName", MAX(tag_len, 3))) {
block_tag = 1;
- } else if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 3))) {
+ } else if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 3))
+ || !strncasecmp(tag, "SubMPName", MAX(tag_len, 3))) {
sub_tag = 1;
} else if (!strncasecmp(tag, "FrontendName",
MAX(tag_len, 2))) {
@@ -1416,7 +1478,7 @@
} else if (!strncasecmp(tag, "SlurmctldDebug",
MAX(tag_len, 2))) {
debug_tag= 1;
- }
+ }
}
/* The order of tests matters here. An update job request can include
@@ -1449,7 +1511,7 @@
fprintf(stderr, "No valid entity in update command\n");
fprintf(stderr, "Input line must include \"NodeName\", ");
if(cluster_flags & CLUSTER_FLAG_BG) {
- fprintf(stderr, "\"BlockName\", \"SubBPName\" "
+ fprintf(stderr, "\"BlockName\", \"SubMPName\" "
"(i.e. bgl000[0-3]),");
}
fprintf(stderr, "\"PartitionName\", \"Reservation\", "
@@ -1582,12 +1644,13 @@
vallen = strlen(val);
} else {
exit_code = 1;
- error("Invalid input for BlueGene SubBPName update %s",
+ error("Invalid input for BlueGene SubMPName update %s",
argv[i]);
return 0;
}
- if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 2)))
+ if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 2))
+ || !strncasecmp(tag, "SubMPName", MAX(tag_len, 2)))
block_msg.mp_str = val;
else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) {
if (!strncasecmp(val, "ERROR", MAX(vallen, 1)))
@@ -1605,7 +1668,7 @@
update_cnt++;
} else {
exit_code = 1;
- error("Invalid input for BlueGene SubBPName update %s",
+ error("Invalid input for BlueGene SubMPName update %s",
argv[i]);
return 0;
}
@@ -1707,6 +1770,8 @@
ping print status of slurmctld daemons. \n\
quiet print no messages other than error messages. \n\
quit terminate this command. \n\
+ reboot_nodes [<nodelist>] reboot the nodes when they become idle. \n\
+ By default all nodes are rebooted. \n\
reconfigure re-read configuration files. \n\
release <job_id> permit specified job to start (see hold) \n\
requeue <job_id> re-queue a batch job \n\
diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c
index 68b684f..a09aedf 100644
--- a/src/scontrol/update_job.c
+++ b/src/scontrol/update_job.c
@@ -344,7 +344,6 @@
return rc;
}
-
/*
* scontrol_update_job - update the slurm job configuration per the supplied
* arguments
@@ -388,8 +387,11 @@
}
if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) {
- job_msg.job_id =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.job_id)) {
+ error ("Invalid JobId value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
}
else if (strncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) {
job_msg.comment = val;
@@ -443,8 +445,11 @@
update_cnt++;
}
else if (strncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) {
- job_msg.priority =
- (uint32_t) strtoll(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.priority)) {
+ error ("Invalid Priority value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) {
@@ -466,7 +471,7 @@
&max_cpus, false) ||
(min_cpus <= 0) ||
(max_cpus && (max_cpus < min_cpus))) {
- error("Invalid NumCPUs value: %s", val);
+ error ("Invalid NumCPUs value: %s", val);
exit_code = 1;
return 0;
}
@@ -477,13 +482,19 @@
}
/* ReqProcs was removed in SLURM version 2.1 */
else if (strncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0) {
- job_msg.num_tasks =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.num_tasks)) {
+ error ("Invalid ReqProcs value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) {
- job_msg.requeue =
- (uint16_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.requeue)) {
+ error ("Invalid Requeue value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
/* ReqNodes was replaced by NumNodes in SLURM version 2.1 */
@@ -509,47 +520,71 @@
update_cnt++;
}
else if (strncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) {
- job_msg.sockets_per_node =
- (uint16_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.sockets_per_node)) {
+ error ("Invalid ReqSockets value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) {
- job_msg.cores_per_socket =
- (uint16_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.cores_per_socket)) {
+ error ("Invalid ReqCores value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) {
- job_msg.ntasks_per_node =
- (uint16_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.ntasks_per_node)) {
+ error ("Invalid TasksPerNode value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) {
- job_msg.threads_per_core =
- (uint16_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.threads_per_core)) {
+ error ("Invalid ReqThreads value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) {
- job_msg.pn_min_cpus =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint16(val, &job_msg.pn_min_cpus)) {
+ error ("Invalid MinCPUsNode value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "MinMemoryNode",
MAX(taglen, 10)) == 0) {
- job_msg.pn_min_memory =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.pn_min_memory)) {
+ error ("Invalid MinMemoryNode value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "MinMemoryCPU",
MAX(taglen, 10)) == 0) {
- job_msg.pn_min_memory =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.pn_min_memory)) {
+ error ("Invalid MinMemoryCPU value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
job_msg.pn_min_memory |= MEM_PER_CPU;
update_cnt++;
}
else if (strncasecmp(tag, "MinTmpDiskNode",
MAX(taglen, 5)) == 0) {
- job_msg.pn_min_tmp_disk =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.pn_min_tmp_disk)) {
+ error ("Invalid MinTmpDiskNode value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) {
@@ -585,8 +620,11 @@
}
else if (strncasecmp(tag, "wait-for-switch", MAX(taglen, 5))
== 0) {
- job_msg.wait4switch =
- (uint32_t) strtol(val, (char **) NULL, 10);
+ if (parse_uint32(val, &job_msg.wait4switch)) {
+ error ("Invalid wait-for-switch value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) {
@@ -594,10 +632,11 @@
job_msg.shared = 1;
else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
job_msg.shared = 0;
- else
- job_msg.shared =
- (uint16_t) strtol(val,
- (char **) NULL, 10);
+ else if (parse_uint16(val, &job_msg.shared)) {
+ error ("Invalid wait-for-switch value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) {
@@ -605,10 +644,11 @@
job_msg.contiguous = 1;
else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
job_msg.contiguous = 0;
- else
- job_msg.contiguous =
- (uint16_t) strtol(val,
- (char **) NULL, 10);
+ else if (parse_uint16(val, &job_msg.contiguous)) {
+ error ("Invalid Contiguous value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){
@@ -684,15 +724,15 @@
}
else if (strncasecmp(tag, "Rotate", MAX(taglen, 2)) == 0) {
- uint16_t rotate;
if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
- rotate = 1;
+ job_msg.rotate = 1;
else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
- rotate = 0;
- else
- rotate = (uint16_t) strtol(val,
- (char **) NULL, 10);
- job_msg.rotate = rotate;
+ job_msg.rotate = 0;
+ else if (parse_uint16(val, &job_msg.rotate)) {
+ error ("Invalid wait-for-switch value: %s", val);
+ exit_code = 1;
+ return 0;
+ }
update_cnt++;
}
else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) {
diff --git a/src/scontrol/update_node.c b/src/scontrol/update_node.c
index 0879b67..cfc16e6 100644
--- a/src/scontrol/update_node.c
+++ b/src/scontrol/update_node.c
@@ -72,9 +72,17 @@
error("Invalid input: %s Request aborted", argv[i]);
return -1;
}
- if (strncasecmp(tag, "NodeName", MAX(tag_len, 1)) == 0)
+
+ if (strncasecmp(tag, "NodeAddr", MAX(tag_len, 5)) == 0) {
+ node_msg.node_addr = val;
+ update_cnt++;
+ } else if (strncasecmp(tag, "NodeHostName", MAX(tag_len, 5))
+ == 0) {
+ node_msg.node_hostname = val;
+ update_cnt++;
+ } else if (strncasecmp(tag, "NodeName", MAX(tag_len, 1)) == 0) {
node_msg.node_names = val;
- else if (strncasecmp(tag, "Features", MAX(tag_len, 1)) == 0) {
+ } else if (strncasecmp(tag, "Features", MAX(tag_len, 1)) == 0) {
node_msg.features = val;
update_cnt++;
} else if (strncasecmp(tag, "Gres", MAX(tag_len, 1)) == 0) {
@@ -156,6 +164,10 @@
MAX(val_len, 3)) == 0) {
node_msg.node_state = NODE_STATE_FAIL;
update_cnt++;
+ } else if (strncasecmp(val, "FUTURE",
+ MAX(val_len, 3)) == 0) {
+ node_msg.node_state = NODE_STATE_FUTURE;
+ update_cnt++;
} else if (strncasecmp(val, "RESUME",
MAX(val_len, 3)) == 0) {
node_msg.node_state = NODE_RESUME;
diff --git a/src/scontrol/update_part.c b/src/scontrol/update_part.c
index 88188ae..a631ae1 100644
--- a/src/scontrol/update_part.c
+++ b/src/scontrol/update_part.c
@@ -112,8 +112,7 @@
}
else if (strncasecmp(tag, "MinNodes", MAX(taglen, 2)) == 0) {
min = 1;
- get_resource_arg_range(val,
- "MinNodes", &min, &max, true);
+ verify_node_count(val, &min, &max);
part_msg_ptr->min_nodes = min;
(*update_cnt_ptr)++;
}
@@ -159,7 +158,7 @@
}
(*update_cnt_ptr)++;
}
- else if (strncasecmp(tag, "RootOnly", MAX(taglen, 1)) == 0) {
+ else if (strncasecmp(tag, "RootOnly", MAX(taglen, 3)) == 0) {
if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
part_msg_ptr->flags |= PART_FLAG_ROOT_ONLY_CLR;
else if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
@@ -173,6 +172,20 @@
}
(*update_cnt_ptr)++;
}
+ else if (strncasecmp(tag, "ReqResv", MAX(taglen, 3)) == 0) {
+ if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
+ part_msg_ptr->flags |= PART_FLAG_REQ_RESV_CLR;
+ else if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
+ part_msg_ptr->flags |= PART_FLAG_REQ_RESV;
+ else {
+ exit_code = 1;
+ error("Invalid input: %s", argv[i]);
+ error("Acceptable ReqResv values "
+ "are YES and NO");
+ return -1;
+ }
+ (*update_cnt_ptr)++;
+ }
else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) {
char *colon_pos = strchr(val, ':');
if (colon_pos) {
@@ -226,8 +239,10 @@
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "Priority", MAX(taglen, 3)) == 0) {
- part_msg_ptr->priority = (uint16_t) strtol(val,
- (char **) NULL, 10);
+ if (parse_uint16(val, &part_msg_ptr->priority)) {
+ error ("Invalid Priority value: %s", val);
+ return -1;
+ }
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "State", MAX(taglen, 2)) == 0) {
@@ -265,33 +280,44 @@
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "GraceTime", MAX(taglen, 5)) == 0) {
- part_msg_ptr->grace_time = slurm_atoul(val);
+ if (parse_uint32(val, &part_msg_ptr->grace_time)) {
+ error ("Invalid GraceTime value: %s", val);
+ return -1;
+ }
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "DefMemPerCPU",
MAX(taglen, 10)) == 0) {
- part_msg_ptr->def_mem_per_cpu = (uint32_t) strtol(val,
- (char **) NULL, 10);
+ if (parse_uint32(val, &part_msg_ptr->def_mem_per_cpu)) {
+ error ("Invalid DefMemPerCPU value: %s", val);
+ return -1;
+ }
part_msg_ptr->def_mem_per_cpu |= MEM_PER_CPU;
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "DefMemPerNode",
MAX(taglen, 10)) == 0) {
- part_msg_ptr->def_mem_per_cpu = (uint32_t) strtol(val,
- (char **) NULL, 10);
+ if (parse_uint32(val, &part_msg_ptr->def_mem_per_cpu)) {
+ error ("Invalid DefMemPerNode value: %s", val);
+ return -1;
+ }
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "MaxMemPerCPU",
MAX(taglen, 10)) == 0) {
- part_msg_ptr->max_mem_per_cpu = (uint32_t) strtol(val,
- (char **) NULL, 10);
+ if (parse_uint32(val, &part_msg_ptr->max_mem_per_cpu)) {
+ error ("Invalid MaxMemPerCPU value: %s", val);
+ return -1;
+ }
part_msg_ptr->max_mem_per_cpu |= MEM_PER_CPU;
(*update_cnt_ptr)++;
}
else if (strncasecmp(tag, "MaxMemPerNode",
MAX(taglen, 10)) == 0) {
- part_msg_ptr->max_mem_per_cpu = (uint32_t) strtol(val,
- (char **) NULL, 10);
+ if (parse_uint32(val, &part_msg_ptr->max_mem_per_cpu)) {
+ error ("Invalid MaxMemPerNode value: %s", val);
+ return -1;
+ }
(*update_cnt_ptr)++;
}
else {
diff --git a/src/sdiag/Makefile.am b/src/sdiag/Makefile.am
new file mode 100644
index 0000000..2f111e8
--- /dev/null
+++ b/src/sdiag/Makefile.am
@@ -0,0 +1,18 @@
+#
+# Makefile for sdiag
+
+AUTOMAKE_OPTIONS = foreign
+
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
+bin_PROGRAMS = sdiag
+
+sdiag_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+
+sdiag_SOURCES = sdiag.c opts.c
+
+force:
+$(sdiag_LDADD) : force
+ @cd `dirname $@` && $(MAKE) `basename $@`
+
+sdiag_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
+
diff --git a/src/sdiag/Makefile.in b/src/sdiag/Makefile.in
new file mode 100644
index 0000000..9268b48
--- /dev/null
+++ b/src/sdiag/Makefile.in
@@ -0,0 +1,653 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for sdiag
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+bin_PROGRAMS = sdiag$(EXEEXT)
+subdir = src/sdiag
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+ $(top_srcdir)/auxdir/libtool.m4 \
+ $(top_srcdir)/auxdir/ltoptions.m4 \
+ $(top_srcdir)/auxdir/ltsugar.m4 \
+ $(top_srcdir)/auxdir/ltversion.m4 \
+ $(top_srcdir)/auxdir/lt~obsolete.m4 \
+ $(top_srcdir)/auxdir/slurm.m4 \
+ $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+ $(top_srcdir)/auxdir/x_ac_affinity.m4 \
+ $(top_srcdir)/auxdir/x_ac_aix.m4 \
+ $(top_srcdir)/auxdir/x_ac_blcr.m4 \
+ $(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+ $(top_srcdir)/auxdir/x_ac_cflags.m4 \
+ $(top_srcdir)/auxdir/x_ac_cray.m4 \
+ $(top_srcdir)/auxdir/x_ac_databases.m4 \
+ $(top_srcdir)/auxdir/x_ac_debug.m4 \
+ $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \
+ $(top_srcdir)/auxdir/x_ac_elan.m4 \
+ $(top_srcdir)/auxdir/x_ac_env.m4 \
+ $(top_srcdir)/auxdir/x_ac_federation.m4 \
+ $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+ $(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+ $(top_srcdir)/auxdir/x_ac_iso.m4 \
+ $(top_srcdir)/auxdir/x_ac_lua.m4 \
+ $(top_srcdir)/auxdir/x_ac_man2html.m4 \
+ $(top_srcdir)/auxdir/x_ac_munge.m4 \
+ $(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+ $(top_srcdir)/auxdir/x_ac_pam.m4 \
+ $(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+ $(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+ $(top_srcdir)/auxdir/x_ac_readline.m4 \
+ $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+ $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+ $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+ $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+ $(top_srcdir)/auxdir/x_ac_srun.m4 \
+ $(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+ $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am_sdiag_OBJECTS = sdiag.$(OBJEXT) opts.$(OBJEXT)
+sdiag_OBJECTS = $(am_sdiag_OBJECTS)
+am__DEPENDENCIES_1 =
+sdiag_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \
+ $(am__DEPENDENCIES_1)
+sdiag_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sdiag_LDFLAGS) \
+ $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
+depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(sdiag_SOURCES)
+DIST_SOURCES = $(sdiag_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_MAN2HTML = @HAVE_MAN2HTML@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
+RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_have_man2html = @ac_have_man2html@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = foreign
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
+sdiag_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+sdiag_SOURCES = sdiag.c opts.c
+sdiag_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/sdiag/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/sdiag/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+sdiag$(EXEEXT): $(sdiag_OBJECTS) $(sdiag_DEPENDENCIES) $(EXTRA_sdiag_DEPENDENCIES)
+ @rm -f sdiag$(EXEEXT)
+ $(sdiag_LINK) $(sdiag_OBJECTS) $(sdiag_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opts.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sdiag.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool ctags distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-binPROGRAMS install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am \
+ uninstall-binPROGRAMS
+
+
+force:
+$(sdiag_LDADD) : force
+ @cd `dirname $@` && $(MAKE) `basename $@`
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/sdiag/opts.c b/src/sdiag/opts.c
new file mode 100644
index 0000000..035d536
--- /dev/null
+++ b/src/sdiag/opts.c
@@ -0,0 +1,121 @@
+/****************************************************************************\
+ * opts.c - functions for processing sdiag parameters
+ *****************************************************************************
+ * Produced at Barcelona Supercomputing Center, December 2011
+ * Written by Alejandro Lucero <alucero@bsc.es>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#if HAVE_GETOPT_H
+# include <getopt.h>
+#else
+# include "src/common/getopt.h"
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "src/common/xstring.h"
+#include "src/common/proc_args.h"
+
+#define OPT_LONG_USAGE 0x101
+
+static void _help( void );
+static void _usage( void );
+
+extern int sdiag_param;
+
+/*
+ * parse_command_line, fill in params data structure with data
+ */
+extern void parse_command_line(int argc, char *argv[])
+{
+ int opt_char;
+ int option_index;
+ static struct option long_options[] = {
+ {"all", no_argument, 0, 'a'},
+ {"help", no_argument, 0, 'h'},
+ {"reset", no_argument, 0, 'r'},
+ {"usage", no_argument, 0, OPT_LONG_USAGE},
+ {"version", no_argument, 0, 'V'},
+ {NULL, 0, 0, 0}
+ };
+
+ while ((opt_char = getopt_long(argc, argv, "ahrV", long_options,
+ &option_index)) != -1) {
+ switch (opt_char) {
+ case (int)'a':
+ sdiag_param = STAT_COMMAND_GET;
+ break;
+ case (int)'h':
+ _help();
+ exit(0);
+ break;
+ case (int)'r':
+ sdiag_param = STAT_COMMAND_RESET;
+ break;
+ case (int) 'V':
+ print_slurm_version();
+ exit(0);
+ break;
+ case (int)OPT_LONG_USAGE:
+ _usage();
+ exit(0);
+ break;
+ }
+ }
+}
+
+
+static void _usage( void )
+{
+ printf("\nUsage: sdiag [-ar] \n");
+}
+
+static void _help( void )
+{
+ printf ("\
+Usage: sdiag [OPTIONS]\n\
+ --a all statistics\n\
+ --r reset statistics\n\
+\nHelp options:\n\
+ --help show this help message\n\
+ --usage display brief usage message\n\
+ --version display current version number\n");
+}
diff --git a/src/sdiag/sdiag.c b/src/sdiag/sdiag.c
new file mode 100644
index 0000000..f418b5f
--- /dev/null
+++ b/src/sdiag/sdiag.c
@@ -0,0 +1,169 @@
+/****************************************************************************\
+ * sdiag.c - Utility for getting information about slurmctld behaviour
+ *****************************************************************************
+ * Produced at Barcelona Supercomputing Center, December 2011
+ * Written by Alejandro Lucero <alucero@bsc.es>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <slurm.h>
+#include "src/common/macros.h"
+#include "src/common/xstring.h"
+#include "src/common/slurm_protocol_defs.h"
+
+/********************
+ * Global Variables *
+ ********************/
+int sdiag_param = STAT_COMMAND_GET;
+
+stats_info_response_msg_t *buf;
+
+static int _get_info(void);
+static int _print_info(void);
+
+stats_info_request_msg_t req;
+
+extern void parse_command_line(int argc, char *argv[]);
+
+int main(int argc, char *argv[])
+{
+ int rc = 0;
+
+ parse_command_line(argc, argv);
+
+ if (sdiag_param == STAT_COMMAND_RESET) {
+ req.command_id = STAT_COMMAND_RESET;
+ rc = slurm_reset_statistics((stats_info_request_msg_t *)&req);
+ if (rc == SLURM_SUCCESS)
+ printf("Reset scheduling statistics\n");
+ else
+ slurm_perror("slurm_reset_statistics");
+ exit(rc);
+ } else {
+ rc = _get_info();
+ if (rc == SLURM_SUCCESS)
+ rc = _print_info();
+ }
+
+ exit(rc);
+}
+
+static int _get_info(void)
+{
+ int rc;
+
+ req.command_id = STAT_COMMAND_GET;
+ rc = slurm_get_statistics(&buf, (stats_info_request_msg_t *)&req);
+ if (rc != SLURM_SUCCESS)
+ slurm_perror("slurm_get_statistics");
+
+ return rc;
+}
+
+static int _print_info(void)
+{
+ if (!buf) {
+ printf("No data available. Probably slurmctld is not working\n");
+ return -1;
+ }
+
+ printf("*******************************************************\n");
+ printf("sdiag output at %s", ctime(&buf->req_time));
+ printf("Data since %s", ctime(&buf->req_time_start));
+ printf("*******************************************************\n");
+
+ printf("Server thread count: %d\n", buf->server_thread_count);
+ printf("Agent queue size: %d\n\n", buf->agent_queue_size);
+ printf("Jobs submitted: %d\n", buf->jobs_submitted);
+ printf("Jobs started: %d\n",
+ buf->jobs_started + buf->bf_last_backfilled_jobs);
+ printf("Jobs completed: %d\n", buf->jobs_completed);
+ printf("Jobs canceled: %d\n", buf->jobs_canceled);
+ printf("Jobs failed: %d\n", buf->jobs_failed);
+ printf("\nMain schedule statistics (microseconds):\n");
+ printf("\tLast cycle: %u\n", buf->schedule_cycle_last);
+ printf("\tMax cycle: %u\n", buf->schedule_cycle_max);
+ printf("\tTotal cycles: %u\n", buf->schedule_cycle_counter);
+ if (buf->schedule_cycle_counter > 0) {
+ printf("\tMean cycle: %u\n",
+ buf->schedule_cycle_sum / buf->schedule_cycle_counter);
+ printf("\tMean depth cycle: %u\n",
+ buf->schedule_cycle_depth / buf->schedule_cycle_counter);
+ }
+ if ((buf->req_time - buf->req_time_start) > 60) {
+ printf("\tCycles per minute: %u\n",
+ (uint32_t) (buf->schedule_cycle_counter /
+ ((buf->req_time - buf->req_time_start) / 60)));
+ }
+ printf("\tLast queue length: %u\n", buf->schedule_queue_len);
+
+ if (buf->bf_active) {
+ printf("\nBackfilling stats (WARNING: data obtained"
+ " in the middle of backfilling execution\n");
+ } else
+ printf("\nBackfilling stats\n");
+
+ printf("\tTotal backfilled jobs (since last slurm start): %u\n",
+ buf->bf_backfilled_jobs);
+ printf("\tTotal backfilled jobs (since last stats cycle start): %u\n",
+ buf->bf_last_backfilled_jobs);
+ printf("\tTotal cycles: %u\n", buf->bf_cycle_counter);
+ printf("\tLast cycle when: %s", ctime(&buf->bf_when_last_cycle));
+ printf("\tLast cycle: %u\n", buf->bf_cycle_last);
+ printf("\tMax cycle: %u\n", buf->bf_cycle_max);
+ if (buf->bf_cycle_counter > 0) {
+ printf("\tMean cycle: %u\n",
+ buf->bf_cycle_sum / buf->bf_cycle_counter);
+ }
+ printf("\tLast depth cycle: %u\n", buf->bf_last_depth);
+ printf("\tLast depth cycle (try sched): %u\n", buf->bf_last_depth_try);
+ if (buf->bf_cycle_counter > 0) {
+ printf("\tDepth Mean: %u\n",
+ buf->bf_depth_sum / buf->bf_cycle_counter);
+ printf("\tDepth Mean (try depth): %u\n",
+ buf->bf_depth_try_sum / buf->bf_cycle_counter);
+ }
+ printf("\tLast queue length: %u\n", buf->bf_queue_len);
+ if (buf->bf_cycle_counter > 0) {
+ printf("\tQueue length mean: %u\n",
+ buf->bf_queue_len_sum / buf->bf_cycle_counter);
+ }
+ return 0;
+}
+
diff --git a/src/sinfo/Makefile.in b/src/sinfo/Makefile.in
index 2701539..07da834 100644
--- a/src/sinfo/Makefile.in
+++ b/src/sinfo/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -195,6 +195,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -231,6 +232,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -401,7 +403,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sinfo$(EXEEXT): $(sinfo_OBJECTS) $(sinfo_DEPENDENCIES)
+sinfo$(EXEEXT): $(sinfo_OBJECTS) $(sinfo_DEPENDENCIES) $(EXTRA_sinfo_DEPENDENCIES)
@rm -f sinfo$(EXEEXT)
$(sinfo_LINK) $(sinfo_OBJECTS) $(sinfo_LDADD) $(LIBS)
@@ -542,10 +544,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c
index b24810e..4dc0c70 100644
--- a/src/sinfo/opts.c
+++ b/src/sinfo/opts.c
@@ -269,13 +269,13 @@
} else if ( params.node_flag ) {
params.node_field_flag = true; /* compute size later */
params.format = params.long_output ?
- "%N %.6D %.9P %.11T %.4c %.8z %.6m %.8d %.6w %.8f %20R" :
+ "%N %.6D %.9P %.11T %.4c %.8z %.6m %.8d %.6w %.8f %20E" :
"%N %.6D %.9P %6t";
} else if (params.list_reasons) {
params.format = params.long_output ?
- "%20R %12U %19H %6t %N" :
- "%20R %9u %19H %N";
+ "%20E %12U %19H %6t %N" :
+ "%20E %9u %19H %N";
} else if ((env_val = getenv ("SINFO_FORMAT"))) {
params.format = xstrdup(env_val);
@@ -533,9 +533,13 @@
field_size,
right_justify,
suffix );
- }
-/* else if (field[0] == 'E') see 'R' below */
- else if (field[0] == 'f') {
+ } else if (field[0] == 'E') {
+ params.match_flags.reason_flag = true;
+ format_add_reason( params.format_list,
+ field_size,
+ right_justify,
+ suffix );
+ } else if (field[0] == 'f') {
params.match_flags.features_flag = true;
format_add_features( params.format_list,
field_size,
@@ -629,9 +633,9 @@
field_size,
right_justify,
suffix );
- } else if ((field[0] == 'E') || (field[0] == 'R')) {
- params.match_flags.reason_flag = true;
- format_add_reason( params.format_list,
+ } else if (field[0] == 'R') {
+ params.match_flags.partition_flag = true;
+ format_add_partition_name( params.format_list,
field_size,
right_justify,
suffix );
diff --git a/src/sinfo/print.c b/src/sinfo/print.c
index 0902cc3..c7b2af2 100644
--- a/src/sinfo/print.c
+++ b/src/sinfo/print.c
@@ -584,7 +584,7 @@
} else {
char *title = "NODELIST";
if(params.cluster_flags & CLUSTER_FLAG_BG)
- title = "BP_LIST";
+ title = "MIDPLANELIST";
_print_str(title, width, right_justify, false);
}
@@ -716,7 +716,7 @@
char *tmp;
tmp = xstrdup(sinfo_data->part_info->name);
if (sinfo_data->part_info->flags & PART_FLAG_DEFAULT) {
- if (strlen(tmp) < width)
+ if ( (strlen(tmp) < width) || (width == 0) )
xstrcat(tmp, "*");
else if (width > 0)
tmp[width-1] = '*';
@@ -732,6 +732,24 @@
return SLURM_SUCCESS;
}
+int _print_partition_name(sinfo_data_t * sinfo_data, int width,
+ bool right_justify, char *suffix)
+{
+ if (sinfo_data) {
+ if (sinfo_data->part_info == NULL)
+ _print_str("n/a", width, right_justify, true);
+ else {
+ _print_str(sinfo_data->part_info->name, width,
+ right_justify, true);
+ }
+ } else
+ _print_str("PARTITION", width, right_justify, true);
+
+ if (suffix)
+ printf("%s", suffix);
+ return SLURM_SUCCESS;
+}
+
int _print_prefix(sinfo_data_t * job, int width, bool right_justify,
char* suffix)
{
diff --git a/src/sinfo/print.h b/src/sinfo/print.h
index 118a1e0..230c991 100644
--- a/src/sinfo/print.h
+++ b/src/sinfo/print.h
@@ -107,6 +107,8 @@
format_add_function(list,wid,right,suffix,_print_nodes_ai)
#define format_add_partition(list,wid,right,suffix) \
format_add_function(list,wid,right,suffix,_print_partition)
+#define format_add_partition_name(list,wid,right,suffix) \
+ format_add_function(list,wid,right,suffix,_print_partition_name)
#define format_add_prefix(list,wid,right,suffix) \
format_add_function(list,wid,right,suffix,_print_prefix)
#define format_add_preempt_mode(list,wid,right,suffix) \
@@ -184,6 +186,8 @@
bool right_justify, char *suffix);
int _print_partition(sinfo_data_t * sinfo_data, int width,
bool right_justify, char *suffix);
+int _print_partition_name(sinfo_data_t * sinfo_data, int width,
+ bool right_justify, char *suffix);
int _print_prefix(sinfo_data_t * sinfo_data, int width,
bool right_justify, char *suffix);
int _print_preempt_mode(sinfo_data_t * sinfo_data, int width,
diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c
index 3369465..299c5ab 100644
--- a/src/sinfo/sinfo.c
+++ b/src/sinfo/sinfo.c
@@ -179,17 +179,16 @@
}
if (!params.no_header)
- printf("BG_BLOCK MIDPLANES OWNER STATE CONNECTION USE\n");
-/* 1234567890123456 123456789012 12345678 12345678 1234567890 12345+ */
-/* RMP_22Apr1544018 bg[123x456] name READY TORUS COPROCESSOR */
+ printf("BG_BLOCK MIDPLANES STATE CONNECTION USE\n");
+/* 1234567890123456 123456789012 12345678 1234567890 12345+ */
+/* RMP_22Apr1544018 bg[123x456] READY TORUS COPROCESSOR */
for (i=0; i<block_ptr->record_count; i++) {
char *conn_str = conn_type_string_full(
block_ptr->block_array[i].conn_type);
- printf("%-16.16s %-15.15s %-8.8s %-8.8s %-10.10s %s\n",
+ printf("%-16.16s %-15.15s %-8.8s %-10.10s %s\n",
block_ptr->block_array[i].bg_block_id,
block_ptr->block_array[i].mp_str,
- block_ptr->block_array[i].owner_name,
bg_block_state_string(
block_ptr->block_array[i].state),
conn_str,
diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am
index 770223a..67b4631 100644
--- a/src/slurmctld/Makefile.am
+++ b/src/slurmctld/Makefile.am
@@ -55,6 +55,7 @@
srun_comm.h \
state_save.c \
state_save.h \
+ statistics.c \
step_mgr.c \
trigger_mgr.c \
trigger_mgr.h
diff --git a/src/slurmctld/Makefile.in b/src/slurmctld/Makefile.in
index 1951ab9..f2f90a7 100644
--- a/src/slurmctld/Makefile.in
+++ b/src/slurmctld/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -97,8 +97,8 @@
ping_nodes.$(OBJEXT) port_mgr.$(OBJEXT) power_save.$(OBJEXT) \
preempt.$(OBJEXT) proc_req.$(OBJEXT) read_config.$(OBJEXT) \
reservation.$(OBJEXT) sched_plugin.$(OBJEXT) \
- srun_comm.$(OBJEXT) state_save.$(OBJEXT) step_mgr.$(OBJEXT) \
- trigger_mgr.$(OBJEXT)
+ srun_comm.$(OBJEXT) state_save.$(OBJEXT) statistics.$(OBJEXT) \
+ step_mgr.$(OBJEXT) trigger_mgr.$(OBJEXT)
slurmctld_OBJECTS = $(am_slurmctld_OBJECTS)
am__DEPENDENCIES_1 =
slurmctld_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -377,6 +379,7 @@
srun_comm.h \
state_save.c \
state_save.h \
+ statistics.c \
step_mgr.c \
trigger_mgr.c \
trigger_mgr.h
@@ -463,7 +466,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-slurmctld$(EXEEXT): $(slurmctld_OBJECTS) $(slurmctld_DEPENDENCIES)
+slurmctld$(EXEEXT): $(slurmctld_OBJECTS) $(slurmctld_DEPENDENCIES) $(EXTRA_slurmctld_DEPENDENCIES)
@rm -f slurmctld$(EXEEXT)
$(slurmctld_LINK) $(slurmctld_OBJECTS) $(slurmctld_LDADD) $(LIBS)
@@ -498,6 +501,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sched_plugin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srun_comm.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/state_save.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/statistics.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/step_mgr.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trigger_mgr.Po@am__quote@
@@ -627,10 +631,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c
index 2bd0185..cb5df1d 100644
--- a/src/slurmctld/acct_policy.c
+++ b/src/slurmctld/acct_policy.c
@@ -75,19 +75,6 @@
return used_limits;
}
-static void _cancel_job(struct job_record *job_ptr)
-{
- time_t now = time(NULL);
-
- last_job_update = now;
- job_ptr->job_state = JOB_FAILED;
- job_ptr->exit_code = 1;
- job_ptr->state_reason = FAIL_ACCOUNT;
- xfree(job_ptr->state_desc);
- job_ptr->start_time = job_ptr->end_time = now;
- job_completion_logger(job_ptr, false);
- delete_job_details(job_ptr);
-}
static uint64_t _get_unused_cpu_run_secs(struct job_record *job_ptr)
{
@@ -114,12 +101,13 @@
(assoc_ptr->uid != job_ptr->user_id)) {
error("Invalid assoc_ptr for jobid=%u", job_ptr->job_id);
memset(&assoc_rec, 0, sizeof(slurmdb_association_rec_t));
- if(job_ptr->assoc_id)
+ if (job_ptr->assoc_id)
assoc_rec.id = job_ptr->assoc_id;
else {
- assoc_rec.uid = job_ptr->user_id;
- assoc_rec.partition = job_ptr->partition;
assoc_rec.acct = job_ptr->account;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
+ assoc_rec.uid = job_ptr->user_id;
}
if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
accounting_enforce,
@@ -142,6 +130,7 @@
WRITE_LOCK, NO_LOCK, NO_LOCK };
uint64_t unused_cpu_run_secs = 0;
uint64_t used_cpu_run_secs = 0;
+ uint32_t job_memory = 0;
if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
|| !_valid_job_assoc(job_ptr))
@@ -153,6 +142,23 @@
used_cpu_run_secs = (uint64_t)job_ptr->total_cpus
* (uint64_t)job_ptr->time_limit * 60;
+ if (job_ptr->details && job_ptr->details->pn_min_memory) {
+ if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
+ job_memory = (job_ptr->details->pn_min_memory
+ & (~MEM_PER_CPU))
+ * job_ptr->total_cpus;
+ debug2("_adjust_limit_usage: job %u: MPC: "
+ "job_memory set to %u", job_ptr->job_id,
+ job_memory);
+ } else {
+ job_memory = (job_ptr->details->pn_min_memory)
+ * job_ptr->node_cnt;
+ debug2("_adjust_limit_usage: job %u: MPN: "
+ "job_memory set to %u", job_ptr->job_id,
+ job_memory);
+ }
+ }
+
assoc_mgr_lock(&locks);
if (job_ptr->qos_ptr && (accounting_enforce & ACCOUNTING_ENFORCE_QOS)) {
slurmdb_qos_rec_t *qos_ptr = NULL;
@@ -195,6 +201,7 @@
case ACCT_POLICY_JOB_BEGIN:
qos_ptr->usage->grp_used_jobs++;
qos_ptr->usage->grp_used_cpus += job_ptr->total_cpus;
+ qos_ptr->usage->grp_used_mem += job_memory;
qos_ptr->usage->grp_used_nodes += job_ptr->node_cnt;
qos_ptr->usage->grp_used_cpu_run_secs +=
used_cpu_run_secs;
@@ -218,6 +225,13 @@
"underflow for qos %s", qos_ptr->name);
}
+ qos_ptr->usage->grp_used_mem -= job_memory;
+ if((int32_t)qos_ptr->usage->grp_used_mem < 0) {
+ qos_ptr->usage->grp_used_mem = 0;
+ debug2("acct_policy_job_fini: grp_used_mem "
+ "underflow for qos %s", qos_ptr->name);
+ }
+
qos_ptr->usage->grp_used_nodes -= job_ptr->node_cnt;
if ((int32_t)qos_ptr->usage->grp_used_nodes < 0) {
qos_ptr->usage->grp_used_nodes = 0;
@@ -230,7 +244,7 @@
if (unused_cpu_run_secs >
qos_ptr->usage->grp_used_cpu_run_secs) {
qos_ptr->usage->grp_used_cpu_run_secs = 0;
- info("acct_policy_job_fini: "
+ debug2("acct_policy_job_fini: "
"grp_used_cpu_run_secs "
"underflow for qos %s", qos_ptr->name);
} else
@@ -288,6 +302,7 @@
case ACCT_POLICY_JOB_BEGIN:
assoc_ptr->usage->used_jobs++;
assoc_ptr->usage->grp_used_cpus += job_ptr->total_cpus;
+ assoc_ptr->usage->grp_used_mem += job_memory;
assoc_ptr->usage->grp_used_nodes += job_ptr->node_cnt;
assoc_ptr->usage->grp_used_cpu_run_secs +=
used_cpu_run_secs;
@@ -312,6 +327,14 @@
assoc_ptr->acct);
}
+ assoc_ptr->usage->grp_used_mem -= job_memory;
+ if ((int32_t)assoc_ptr->usage->grp_used_mem < 0) {
+ assoc_ptr->usage->grp_used_mem = 0;
+ debug2("acct_policy_job_fini: grp_used_mem "
+ "underflow for account %s",
+ assoc_ptr->acct);
+ }
+
assoc_ptr->usage->grp_used_nodes -= job_ptr->node_cnt;
if ((int32_t)assoc_ptr->usage->grp_used_nodes < 0) {
assoc_ptr->usage->grp_used_nodes = 0;
@@ -393,9 +416,9 @@
struct part_record *part_ptr,
slurmdb_association_rec_t *assoc_in,
slurmdb_qos_rec_t *qos_ptr,
- uint16_t *limit_set_max_cpus,
- uint16_t *limit_set_max_nodes,
- uint16_t *limit_set_time, bool update_call)
+ uint16_t *reason,
+ acct_policy_limit_set_t *acct_policy_limit_set,
+ bool update_call)
{
uint32_t time_limit;
slurmdb_association_rec_t *assoc_ptr = assoc_in;
@@ -404,63 +427,92 @@
bool rc = true;
uint32_t qos_max_cpus_limit = INFINITE;
uint32_t qos_max_nodes_limit = INFINITE;
+ uint32_t job_memory = 0;
+ bool admin_set_memory_limit = false;
assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
READ_LOCK, NO_LOCK, NO_LOCK };
- xassert(limit_set_max_cpus);
- xassert(limit_set_max_nodes);
- xassert(limit_set_time);
+ xassert(acct_policy_limit_set);
if (!assoc_ptr) {
error("_validate_acct_policy: no assoc_ptr given for job.");
return false;
}
-
user_name = assoc_ptr->user;
+ if (job_desc->pn_min_memory != NO_VAL) {
+ if ((job_desc->pn_min_memory & MEM_PER_CPU)
+ && (job_desc->min_cpus != NO_VAL)) {
+ job_memory = (job_desc->pn_min_memory & (~MEM_PER_CPU))
+ * job_desc->min_cpus;
+ admin_set_memory_limit =
+ (acct_policy_limit_set->pn_min_memory
+ == ADMIN_SET_LIMIT)
+ || (acct_policy_limit_set->max_cpus
+ == ADMIN_SET_LIMIT);
+ debug3("acct_policy_validate: MPC: "
+ "job_memory set to %u", job_memory);
+ } else if (job_desc->min_nodes != NO_VAL) {
+ job_memory = (job_desc->pn_min_memory)
+ * job_desc->min_nodes;
+ admin_set_memory_limit =
+ (acct_policy_limit_set->pn_min_memory
+ == ADMIN_SET_LIMIT)
+ || (acct_policy_limit_set->max_nodes
+ == ADMIN_SET_LIMIT);
+ debug3("acct_policy_validate: MPN: "
+ "job_memory set to %u", job_memory);
+ }
+ }
+
assoc_mgr_lock(&locks);
+
if (qos_ptr) {
slurmdb_used_limits_t *used_limits = NULL;
-
/* for validation we don't need to look at
* qos_ptr->grp_cpu_mins.
*/
qos_max_cpus_limit =
MIN(qos_ptr->grp_cpus, qos_ptr->max_cpus_pu);
- if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT)
|| (qos_max_cpus_limit == INFINITE)
|| (update_call && (job_desc->max_cpus == NO_VAL))) {
/* no need to check/set */
- } else if ((job_desc->min_cpus != NO_VAL)
+
+ } else if (reason && (job_desc->min_cpus != NO_VAL)
&& (job_desc->min_cpus > qos_ptr->max_cpus_pu)) {
- info("job submit for user %s(%u): "
- "min cpu request %u exceeds "
- "per-user max cpu limit %u for qos '%s'",
- user_name,
- job_desc->user_id,
- job_desc->min_cpus,
- qos_ptr->max_cpus_pu,
- qos_ptr->name);
+ *reason = WAIT_QOS_RESOURCE_LIMIT;
+
+ debug2("job submit for user %s(%u): "
+ "min cpu request %u exceeds "
+ "per-user max cpu limit %u for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_cpus,
+ qos_ptr->max_cpus_pu,
+ qos_ptr->name);
rc = false;
goto end_it;
- } else if ((job_desc->min_cpus != NO_VAL)
+ } else if (reason && (job_desc->min_cpus != NO_VAL)
&& (job_desc->min_cpus > qos_ptr->grp_cpus)) {
- info("job submit for user %s(%u): "
- "min cpu request %u exceeds "
- "group max cpu limit %u for qos '%s'",
- user_name,
- job_desc->user_id,
- job_desc->min_cpus,
- qos_ptr->grp_cpus,
- qos_ptr->name);
+ *reason = WAIT_QOS_RESOURCE_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min cpu request %u exceeds "
+ "group max cpu limit %u for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_cpus,
+ qos_ptr->grp_cpus,
+ qos_ptr->name);
rc = false;
goto end_it;
} else if ((job_desc->max_cpus == NO_VAL)
- || ((*limit_set_max_cpus)
+ || (acct_policy_limit_set->max_cpus
&& (job_desc->max_cpus > qos_max_cpus_limit))) {
job_desc->max_cpus = qos_max_cpus_limit;
- (*limit_set_max_cpus) = 1;
- } else if (job_desc->max_cpus > qos_max_cpus_limit) {
+ acct_policy_limit_set->max_cpus = 1;
+ } else if (reason && job_desc->max_cpus > qos_max_cpus_limit) {
+ *reason = WAIT_QOS_RESOURCE_LIMIT;
info("job submit for user %s(%u): "
"max cpu changed %u -> %u because "
"of qos limit",
@@ -469,51 +521,70 @@
job_desc->max_cpus,
qos_max_cpus_limit);
if (job_desc->max_cpus == NO_VAL)
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
job_desc->max_cpus = qos_max_cpus_limit;
}
/* for validation we don't need to look at
* qos_ptr->grp_jobs.
*/
+ if (!admin_set_memory_limit && reason
+ && (qos_ptr->grp_mem != INFINITE)
+ && (job_memory > qos_ptr->grp_mem)) {
+ *reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min memory request %u exceeds "
+ "group max memory limit %u for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ job_memory,
+ qos_ptr->grp_mem,
+ qos_ptr->name);
+ rc = false;
+ goto end_it;
+ }
qos_max_nodes_limit =
MIN(qos_ptr->grp_nodes, qos_ptr->max_nodes_pu);
- if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_nodes == ADMIN_SET_LIMIT)
|| (qos_max_nodes_limit == INFINITE)
|| (update_call && (job_desc->max_nodes == NO_VAL))) {
/* no need to check/set */
- } else if ((job_desc->min_nodes != NO_VAL)
+ } else if (reason && (job_desc->min_nodes != NO_VAL)
&& (job_desc->min_nodes > qos_ptr->max_nodes_pu)) {
- info("job submit for user %s(%u): "
- "min node request %u exceeds "
- "per-user max node limit %u for qos '%s'",
- user_name,
- job_desc->user_id,
- job_desc->min_nodes,
- qos_ptr->max_nodes_pu,
- qos_ptr->name);
+ *reason = WAIT_QOS_RESOURCE_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min node request %u exceeds "
+ "per-user max node limit %u for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_nodes,
+ qos_ptr->max_nodes_pu,
+ qos_ptr->name);
rc = false;
goto end_it;
- } else if ((job_desc->min_nodes != NO_VAL)
+ } else if (reason && (job_desc->min_nodes != NO_VAL)
&& (job_desc->min_nodes > qos_ptr->grp_nodes)) {
- info("job submit for user %s(%u): "
- "min node request %u exceeds "
- "group max node limit %u for qos '%s'",
- user_name,
- job_desc->user_id,
- job_desc->min_nodes,
- qos_ptr->grp_nodes,
- qos_ptr->name);
+ *reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min node request %u exceeds "
+ "group max node limit %u for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_nodes,
+ qos_ptr->grp_nodes,
+ qos_ptr->name);
rc = false;
goto end_it;
} else if ((job_desc->max_nodes == 0)
- || ((*limit_set_max_nodes)
+ || (acct_policy_limit_set->max_nodes
&& (job_desc->max_nodes
> qos_max_nodes_limit))) {
job_desc->max_nodes = qos_max_nodes_limit;
- (*limit_set_max_nodes) = 1;
- } else if (job_desc->max_nodes > qos_max_nodes_limit) {
+ acct_policy_limit_set->max_nodes = 1;
+ } else if (reason
+ && job_desc->max_nodes > qos_max_nodes_limit) {
+ *reason = WAIT_QOS_JOB_LIMIT;
info("job submit for user %s(%u): "
"max node changed %u -> %u because "
"of qos limit",
@@ -522,20 +593,20 @@
job_desc->max_nodes,
qos_max_nodes_limit);
if (job_desc->max_nodes == NO_VAL)
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
job_desc->max_nodes = qos_max_nodes_limit;
}
if ((qos_ptr->grp_submit_jobs != INFINITE) &&
(qos_ptr->usage->grp_used_submit_jobs
>= qos_ptr->grp_submit_jobs)) {
- info("job submit for user %s(%u): "
- "group max submit job limit exceeded %u "
- "for qos '%s'",
- user_name,
- job_desc->user_id,
- qos_ptr->grp_submit_jobs,
- qos_ptr->name);
+ debug2("job submit for user %s(%u): "
+ "group max submit job limit exceeded %u "
+ "for qos '%s'",
+ user_name,
+ job_desc->user_id,
+ qos_ptr->grp_submit_jobs,
+ qos_ptr->name);
rc = false;
goto end_it;
}
@@ -551,28 +622,31 @@
* job is running.
*/
- if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT)
|| (qos_ptr->max_cpus_pj == INFINITE)
|| (update_call && (job_desc->max_cpus == NO_VAL))) {
/* no need to check/set */
- } else if ((job_desc->min_cpus != NO_VAL)
+ } else if (reason && (job_desc->min_cpus != NO_VAL)
&& (job_desc->min_cpus > qos_ptr->max_cpus_pj)) {
- info("job submit for user %s(%u): "
- "min cpu limit %u exceeds "
- "qos max %u",
- user_name,
- job_desc->user_id,
- job_desc->min_cpus,
- qos_ptr->max_cpus_pj);
+ *reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min cpu limit %u exceeds "
+ "qos max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_cpus,
+ qos_ptr->max_cpus_pj);
rc = false;
goto end_it;
} else if ((job_desc->max_cpus == NO_VAL)
- || ((*limit_set_max_cpus)
+ || (acct_policy_limit_set->max_cpus
&& (job_desc->max_cpus
> qos_ptr->max_cpus_pj))) {
job_desc->max_cpus = qos_ptr->max_cpus_pj;
- (*limit_set_max_cpus) = 1;
- } else if (job_desc->max_cpus > qos_ptr->max_cpus_pj) {
+ acct_policy_limit_set->max_cpus = 1;
+ } else if (reason
+ && job_desc->max_cpus > qos_ptr->max_cpus_pj) {
+ *reason = WAIT_QOS_JOB_LIMIT;
info("job submit for user %s(%u): "
"max cpu changed %u -> %u because "
"of qos limit",
@@ -581,7 +655,7 @@
job_desc->max_cpus,
qos_ptr->max_cpus_pj);
if (job_desc->max_cpus == NO_VAL)
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
job_desc->max_cpus = qos_ptr->max_cpus_pj;
}
@@ -589,28 +663,31 @@
* qos_ptr->max_jobs.
*/
- if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_nodes == ADMIN_SET_LIMIT)
|| (qos_ptr->max_nodes_pj == INFINITE)
|| (update_call && (job_desc->max_nodes == NO_VAL))) {
/* no need to check/set */
- } else if ((job_desc->min_nodes != NO_VAL)
+ } else if (reason && (job_desc->min_nodes != NO_VAL)
&& (job_desc->min_nodes > qos_ptr->max_nodes_pj)) {
- info("job submit for user %s(%u): "
- "min node limit %u exceeds "
- "qos max %u",
- user_name,
- job_desc->user_id,
- job_desc->min_nodes,
- qos_ptr->max_nodes_pj);
+ *reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "min node limit %u exceeds "
+ "qos max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_nodes,
+ qos_ptr->max_nodes_pj);
rc = false;
goto end_it;
} else if ((job_desc->max_nodes == 0)
- || ((*limit_set_max_nodes)
+ || (acct_policy_limit_set->max_nodes
&& (job_desc->max_nodes
> qos_ptr->max_nodes_pj))) {
job_desc->max_nodes = qos_ptr->max_nodes_pj;
- (*limit_set_max_nodes) = 1;
- } else if (job_desc->max_nodes > qos_ptr->max_nodes_pj) {
+ acct_policy_limit_set->max_nodes = 1;
+ } else if (reason
+ && job_desc->max_nodes > qos_ptr->max_nodes_pj) {
+ *reason = WAIT_QOS_JOB_LIMIT;
info("job submit for user %s(%u): "
"max node changed %u -> %u because "
"of qos limit",
@@ -619,7 +696,7 @@
job_desc->max_nodes,
qos_ptr->max_nodes_pj);
if (job_desc->max_nodes == NO_VAL)
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
job_desc->max_nodes = qos_ptr->max_nodes_pj;
}
@@ -630,17 +707,17 @@
job_desc->user_id);
if (used_limits && (used_limits->submit_jobs
>= qos_ptr->max_submit_jobs_pu)) {
- info("job submit for user %s(%u): "
- "qos max submit job limit exceeded %u",
- user_name,
- job_desc->user_id,
- qos_ptr->max_submit_jobs_pu);
+ debug2("job submit for user %s(%u): "
+ "qos max submit job limit exceeded %u",
+ user_name,
+ job_desc->user_id,
+ qos_ptr->max_submit_jobs_pu);
rc = false;
goto end_it;
}
}
- if (((*limit_set_time) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->time == ADMIN_SET_LIMIT)
|| (qos_ptr->max_wall_pj == INFINITE)
|| (update_call && (job_desc->time_limit == NO_VAL))) {
/* no need to check/set */
@@ -653,16 +730,18 @@
job_desc->time_limit =
MIN(time_limit,
part_ptr->max_time);
- (*limit_set_time) = 1;
- } else if ((*limit_set_time) &&
+ acct_policy_limit_set->time = 1;
+ } else if (acct_policy_limit_set->time &&
job_desc->time_limit > time_limit) {
job_desc->time_limit = time_limit;
- } else if (job_desc->time_limit > time_limit) {
- info("job submit for user %s(%u): "
- "time limit %u exceeds qos max %u",
- user_name,
- job_desc->user_id,
- job_desc->time_limit, time_limit);
+ } else if (reason
+ && job_desc->time_limit > time_limit) {
+ *reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job submit for user %s(%u): "
+ "time limit %u exceeds qos max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->time_limit, time_limit);
rc = false;
goto end_it;
}
@@ -670,33 +749,33 @@
}
- while(assoc_ptr) {
+ while (assoc_ptr) {
/* for validation we don't need to look at
* assoc_ptr->grp_cpu_mins.
*/
- if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT)
|| (qos_ptr && (qos_ptr->grp_cpus != INFINITE))
|| (assoc_ptr->grp_cpus == INFINITE)
|| (update_call && (job_desc->max_cpus == NO_VAL))) {
/* no need to check/set */
} else if ((job_desc->min_cpus != NO_VAL)
&& (job_desc->min_cpus > assoc_ptr->grp_cpus)) {
- info("job submit for user %s(%u): "
- "min cpu request %u exceeds "
- "group max cpu limit %u for account %s",
- user_name,
- job_desc->user_id,
- job_desc->min_cpus,
- assoc_ptr->grp_cpus,
- assoc_ptr->acct);
+ debug2("job submit for user %s(%u): "
+ "min cpu request %u exceeds "
+ "group max cpu limit %u for account %s",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_cpus,
+ assoc_ptr->grp_cpus,
+ assoc_ptr->acct);
rc = false;
break;
} else if ((job_desc->max_cpus == NO_VAL)
- || ((*limit_set_max_cpus)
+ || (acct_policy_limit_set->max_cpus
&& (job_desc->max_cpus > assoc_ptr->grp_cpus))) {
job_desc->max_cpus = assoc_ptr->grp_cpus;
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
} else if (job_desc->max_cpus > assoc_ptr->grp_cpus) {
info("job submit for user %s(%u): "
"max cpu changed %u -> %u because "
@@ -706,37 +785,52 @@
job_desc->max_cpus,
assoc_ptr->grp_cpus);
if (job_desc->max_cpus == NO_VAL)
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
job_desc->max_cpus = assoc_ptr->grp_cpus;
}
/* for validation we don't need to look at
* assoc_ptr->grp_jobs.
*/
+ if (!admin_set_memory_limit
+ && (!qos_ptr || (qos_ptr->grp_mem == INFINITE))
+ && (assoc_ptr->grp_mem != INFINITE)
+ && (job_memory > assoc_ptr->grp_mem)) {
+ debug2("job submit for user %s(%u): "
+ "min memory request %u exceeds "
+ "group max memory limit %u for account %s",
+ user_name,
+ job_desc->user_id,
+ job_memory,
+ assoc_ptr->grp_mem,
+ assoc_ptr->acct);
+ rc = false;
+ break;
+ }
- if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_nodes == ADMIN_SET_LIMIT)
|| (qos_ptr && (qos_ptr->grp_nodes != INFINITE))
|| (assoc_ptr->grp_nodes == INFINITE)
|| (update_call && (job_desc->max_nodes == NO_VAL))) {
/* no need to check/set */
} else if ((job_desc->min_nodes != NO_VAL)
&& (job_desc->min_nodes > assoc_ptr->grp_nodes)) {
- info("job submit for user %s(%u): "
- "min node request %u exceeds "
- "group max node limit %u for account %s",
- user_name,
- job_desc->user_id,
- job_desc->min_nodes,
- assoc_ptr->grp_nodes,
- assoc_ptr->acct);
+ debug2("job submit for user %s(%u): "
+ "min node request %u exceeds "
+ "group max node limit %u for account %s",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_nodes,
+ assoc_ptr->grp_nodes,
+ assoc_ptr->acct);
rc = false;
break;
} else if ((job_desc->max_nodes == 0)
- || ((*limit_set_max_nodes)
+ || (acct_policy_limit_set->max_nodes
&& (job_desc->max_nodes
> assoc_ptr->grp_nodes))) {
job_desc->max_nodes = assoc_ptr->grp_nodes;
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
} else if (job_desc->max_nodes > assoc_ptr->grp_nodes) {
info("job submit for user %s(%u): "
"max node changed %u -> %u because "
@@ -746,7 +840,7 @@
job_desc->max_nodes,
assoc_ptr->grp_nodes);
if (job_desc->max_nodes == NO_VAL)
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
job_desc->max_nodes = assoc_ptr->grp_nodes;
}
@@ -755,13 +849,13 @@
(assoc_ptr->grp_submit_jobs != INFINITE) &&
(assoc_ptr->usage->used_submit_jobs
>= assoc_ptr->grp_submit_jobs)) {
- info("job submit for user %s(%u): "
- "group max submit job limit exceeded %u "
- "for account '%s'",
- user_name,
- job_desc->user_id,
- assoc_ptr->grp_submit_jobs,
- assoc_ptr->acct);
+ debug2("job submit for user %s(%u): "
+ "group max submit job limit exceeded %u "
+ "for account '%s'",
+ user_name,
+ job_desc->user_id,
+ assoc_ptr->grp_submit_jobs,
+ assoc_ptr->acct);
rc = false;
break;
}
@@ -784,28 +878,28 @@
* assoc_ptr->max_cpu_mins_pj.
*/
- if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT)
|| (qos_ptr && (qos_ptr->max_cpus_pj != INFINITE))
|| (assoc_ptr->max_cpus_pj == INFINITE)
|| (update_call && (job_desc->max_cpus == NO_VAL))) {
/* no need to check/set */
} else if ((job_desc->min_cpus != NO_VAL)
&& (job_desc->min_cpus > assoc_ptr->max_cpus_pj)) {
- info("job submit for user %s(%u): "
- "min cpu limit %u exceeds "
- "account max %u",
- user_name,
- job_desc->user_id,
- job_desc->min_cpus,
- assoc_ptr->max_cpus_pj);
+ debug2("job submit for user %s(%u): "
+ "min cpu limit %u exceeds "
+ "account max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_cpus,
+ assoc_ptr->max_cpus_pj);
rc = false;
break;
} else if (job_desc->max_cpus == NO_VAL
- || ((*limit_set_max_cpus)
+ || (acct_policy_limit_set->max_cpus
&& (job_desc->max_cpus
> assoc_ptr->max_cpus_pj))) {
job_desc->max_cpus = assoc_ptr->max_cpus_pj;
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
} else if (job_desc->max_cpus > assoc_ptr->max_cpus_pj) {
info("job submit for user %s(%u): "
"max cpu changed %u -> %u because "
@@ -815,7 +909,7 @@
job_desc->max_cpus,
assoc_ptr->max_cpus_pj);
if (job_desc->max_cpus == NO_VAL)
- (*limit_set_max_cpus) = 1;
+ acct_policy_limit_set->max_cpus = 1;
job_desc->max_cpus = assoc_ptr->max_cpus_pj;
}
@@ -823,29 +917,29 @@
* assoc_ptr->max_jobs.
*/
- if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->max_nodes == ADMIN_SET_LIMIT)
|| (qos_ptr && (qos_ptr->max_nodes_pj != INFINITE))
|| (assoc_ptr->max_nodes_pj == INFINITE)
|| (update_call && (job_desc->max_nodes == NO_VAL))) {
/* no need to check/set */
} else if ((job_desc->min_nodes != NO_VAL)
&& (job_desc->min_nodes > assoc_ptr->max_nodes_pj)) {
- info("job submit for user %s(%u): "
- "min node limit %u exceeds "
- "account max %u",
- user_name,
- job_desc->user_id,
- job_desc->min_nodes,
- assoc_ptr->max_nodes_pj);
+ debug2("job submit for user %s(%u): "
+ "min node limit %u exceeds "
+ "account max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->min_nodes,
+ assoc_ptr->max_nodes_pj);
rc = false;
break;
} else if (((job_desc->max_nodes == NO_VAL)
|| (job_desc->max_nodes == 0))
- || ((*limit_set_max_nodes)
+ || (acct_policy_limit_set->max_nodes
&& (job_desc->max_nodes
> assoc_ptr->max_nodes_pj))) {
job_desc->max_nodes = assoc_ptr->max_nodes_pj;
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
} else if (job_desc->max_nodes > assoc_ptr->max_nodes_pj) {
info("job submit for user %s(%u): "
"max node changed %u -> %u because "
@@ -855,7 +949,7 @@
job_desc->max_nodes,
assoc_ptr->max_nodes_pj);
if (job_desc->max_nodes == NO_VAL)
- (*limit_set_max_nodes) = 1;
+ acct_policy_limit_set->max_nodes = 1;
job_desc->max_nodes = assoc_ptr->max_nodes_pj;
}
@@ -864,16 +958,16 @@
(assoc_ptr->max_submit_jobs != INFINITE) &&
(assoc_ptr->usage->used_submit_jobs
>= assoc_ptr->max_submit_jobs)) {
- info("job submit for user %s(%u): "
- "account max submit job limit exceeded %u",
- user_name,
- job_desc->user_id,
- assoc_ptr->max_submit_jobs);
+ debug2("job submit for user %s(%u): "
+ "account max submit job limit exceeded %u",
+ user_name,
+ job_desc->user_id,
+ assoc_ptr->max_submit_jobs);
rc = false;
break;
}
- if (((*limit_set_time) == ADMIN_SET_LIMIT)
+ if ((acct_policy_limit_set->time == ADMIN_SET_LIMIT)
|| (qos_ptr && (qos_ptr->max_wall_pj != INFINITE))
|| (assoc_ptr->max_wall_pj == INFINITE)
|| (update_call && (job_desc->time_limit == NO_VAL))) {
@@ -887,16 +981,16 @@
job_desc->time_limit =
MIN(time_limit,
part_ptr->max_time);
- (*limit_set_time) = 1;
- } else if ((*limit_set_time) &&
+ acct_policy_limit_set->time = 1;
+ } else if (acct_policy_limit_set->time &&
job_desc->time_limit > time_limit) {
job_desc->time_limit = time_limit;
} else if (job_desc->time_limit > time_limit) {
- info("job submit for user %s(%u): "
- "time limit %u exceeds account max %u",
- user_name,
- job_desc->user_id,
- job_desc->time_limit, time_limit);
+ debug2("job submit for user %s(%u): "
+ "time limit %u exceeds account max %u",
+ user_name,
+ job_desc->user_id,
+ job_desc->time_limit, time_limit);
rc = false;
break;
}
@@ -912,6 +1006,24 @@
}
/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by an association or QOS limit. Does not re-validate job state.
+ */
+extern bool acct_policy_job_runnable_state(struct job_record *job_ptr)
+{
+ if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
+ (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
+ (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
+ (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
+ (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
+ (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
* acct_policy_job_runnable - Determine of the specified job can execute
* right now or not depending upon accounting policy (e.g. running
* job limit for this association). If the association limits prevent
@@ -929,7 +1041,8 @@
bool rc = true;
uint64_t usage_mins;
uint32_t wall_mins;
- bool cancel_job = 0;
+ uint32_t job_memory = 0;
+ bool admin_set_memory_limit = false;
int parent = 0; /*flag to tell us if we are looking at the
* parent or not
*/
@@ -941,7 +1054,7 @@
return true;
if (!_valid_job_assoc(job_ptr)) {
- _cancel_job(job_ptr);
+ job_ptr->state_reason = FAIL_ACCOUNT;
return false;
}
@@ -950,20 +1063,42 @@
return true;
/* clear old state reason */
- if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT))
+ if (!acct_policy_job_runnable_state(job_ptr))
job_ptr->state_reason = WAIT_NO_REASON;
job_cpu_time_limit = (uint64_t)job_ptr->time_limit
* (uint64_t)job_ptr->details->min_cpus;
+ if (job_ptr->details->pn_min_memory) {
+ if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
+ job_memory = (job_ptr->details->pn_min_memory
+ & (~MEM_PER_CPU))
+ * job_ptr->details->min_cpus;
+ admin_set_memory_limit =
+ (job_ptr->limit_set_pn_min_memory
+ == ADMIN_SET_LIMIT)
+ || (job_ptr->limit_set_min_cpus
+ == ADMIN_SET_LIMIT);
+ debug3("acct_policy_job_runnable: job %u: MPC: "
+ "job_memory set to %u", job_ptr->job_id,
+ job_memory);
+ } else {
+ job_memory = (job_ptr->details->pn_min_memory)
+ * job_ptr->details->min_nodes;
+ admin_set_memory_limit =
+ (job_ptr->limit_set_pn_min_memory
+ == ADMIN_SET_LIMIT)
+ || (job_ptr->limit_set_min_nodes
+ == ADMIN_SET_LIMIT);
+ debug3("acct_policy_job_runnable: job %u: MPN: "
+ "job_memory set to %u", job_ptr->job_id,
+ job_memory);
+ }
+ }
+
assoc_mgr_lock(&locks);
qos_ptr = job_ptr->qos_ptr;
- if(qos_ptr) {
+ if (qos_ptr) {
slurmdb_used_limits_t *used_limits = NULL;
usage_mins = (uint64_t)(qos_ptr->usage->usage_raw / 60.0);
wall_mins = qos_ptr->usage->grp_used_wall / 60;
@@ -975,8 +1110,8 @@
*/
if ((qos_ptr->grp_cpu_mins != (uint64_t)INFINITE)
&& (usage_mins >= qos_ptr->grp_cpu_mins)) {
- job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
debug2("Job %u being held, "
"the job is at or exceeds QOS %s's "
"group max cpu minutes of %"PRIu64" "
@@ -997,24 +1132,25 @@
if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
&& qos_ptr->grp_cpus != INFINITE) {
if (job_ptr->details->min_cpus > qos_ptr->grp_cpus) {
- info("job %u is being cancelled, "
- "min cpu request %u exceeds "
- "group max cpu limit %u for "
- "qos '%s'",
- job_ptr->job_id,
- job_ptr->details->min_cpus,
- qos_ptr->grp_cpus,
- qos_ptr->name);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
+ debug2("job %u is being held, "
+ "min cpu request %u exceeds "
+ "group max cpu limit %u for "
+ "qos '%s'",
+ job_ptr->job_id,
+ job_ptr->details->min_cpus,
+ qos_ptr->grp_cpus,
+ qos_ptr->name);
rc = false;
goto end_it;
}
if ((qos_ptr->usage->grp_used_cpus +
job_ptr->details->min_cpus) > qos_ptr->grp_cpus) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_QOS_RESOURCE_LIMIT;
- xfree(job_ptr->state_desc);
debug2("job %u being held, "
"the job is at or exceeds "
"group max cpu limit %u "
@@ -1030,10 +1166,47 @@
}
}
+ if (!admin_set_memory_limit
+ && (qos_ptr->grp_mem != INFINITE)) {
+ if (job_memory > qos_ptr->grp_mem) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
+ info("job %u is being held, "
+ "memory request %u exceeds "
+ "group max memory limit %u for "
+ "qos '%s'",
+ job_ptr->job_id,
+ job_memory,
+ qos_ptr->grp_mem,
+ qos_ptr->name);
+ rc = false;
+ goto end_it;
+ }
+
+ if ((qos_ptr->usage->grp_used_mem +
+ job_memory) > qos_ptr->grp_mem) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "the job is at or exceeds "
+ "group memory limit %u "
+ "with already used %u + requested %u "
+ "for qos %s",
+ job_ptr->job_id,
+ qos_ptr->grp_mem,
+ qos_ptr->usage->grp_used_mem,
+ job_memory,
+ qos_ptr->name);
+ rc = false;
+ goto end_it;
+ }
+ }
+
if ((qos_ptr->grp_jobs != INFINITE) &&
(qos_ptr->usage->grp_used_jobs >= qos_ptr->grp_jobs)) {
- job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
debug2("job %u being held, "
"the job is at or exceeds "
"group max jobs limit %u with %u for qos %s",
@@ -1048,9 +1221,9 @@
if (qos_ptr->grp_cpu_run_mins != INFINITE) {
if (cpu_run_mins + job_cpu_time_limit >
qos_ptr->grp_cpu_run_mins) {
- job_ptr->state_reason =
- WAIT_ASSOC_RESOURCE_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_RESOURCE_LIMIT;
debug2("job %u being held, "
"qos %s is at or exceeds "
"group max running cpu minutes "
@@ -1070,15 +1243,17 @@
if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
&& qos_ptr->grp_nodes != INFINITE) {
if (job_ptr->details->min_nodes > qos_ptr->grp_nodes) {
- info("job %u is being cancelled, "
- "min node request %u exceeds "
- "group max node limit %u for "
- "qos '%s'",
- job_ptr->job_id,
- job_ptr->details->min_nodes,
- qos_ptr->grp_nodes,
- qos_ptr->name);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_JOB_LIMIT;
+ debug2("job %u is being held, "
+ "min node request %u exceeds "
+ "group max node limit %u for "
+ "qos '%s'",
+ job_ptr->job_id,
+ job_ptr->details->min_nodes,
+ qos_ptr->grp_nodes,
+ qos_ptr->name);
rc = false;
goto end_it;
}
@@ -1086,9 +1261,9 @@
if ((qos_ptr->usage->grp_used_nodes +
job_ptr->details->min_nodes) >
qos_ptr->grp_nodes) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_QOS_RESOURCE_LIMIT;
- xfree(job_ptr->state_desc);
debug2("job %u being held, "
"the job is at or exceeds "
"group max node limit %u "
@@ -1108,8 +1283,8 @@
if ((qos_ptr->grp_wall != INFINITE)
&& (wall_mins >= qos_ptr->grp_wall)) {
- job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_QOS_JOB_LIMIT;
debug2("job %u being held, "
"the job is at or exceeds "
"group wall limit %u "
@@ -1125,13 +1300,15 @@
cpu_time_limit = qos_ptr->max_cpu_mins_pj;
if ((job_ptr->time_limit != NO_VAL) &&
(job_cpu_time_limit > cpu_time_limit)) {
- info("job %u being cancelled, "
- "cpu time limit %"PRIu64" exceeds "
- "qos max per-job %"PRIu64"",
- job_ptr->job_id,
- job_cpu_time_limit,
- cpu_time_limit);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_JOB_LIMIT;
+ debug2("job %u being held, "
+ "cpu time limit %"PRIu64" exceeds "
+ "qos max per-job %"PRIu64"",
+ job_ptr->job_id,
+ job_cpu_time_limit,
+ cpu_time_limit);
rc = false;
goto end_it;
}
@@ -1141,13 +1318,15 @@
&& qos_ptr->max_cpus_pj != INFINITE) {
if (job_ptr->details->min_cpus >
qos_ptr->max_cpus_pj) {
- info("job %u being cancelled, "
- "min cpu limit %u exceeds "
- "qos per-job max %u",
- job_ptr->job_id,
- job_ptr->details->min_cpus,
- qos_ptr->max_cpus_pj);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_JOB_LIMIT;
+ debug2("job %u being held, "
+ "min cpu limit %u exceeds "
+ "qos per-job max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_cpus,
+ qos_ptr->max_cpus_pj);
rc = false;
goto end_it;
}
@@ -1155,18 +1334,20 @@
if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT) &&
(qos_ptr->max_cpus_pu != INFINITE)) {
- /* Cancel the job if it exceeds the per-user
+ /* Hold the job if it exceeds the per-user
* CPU limit for the given QOS
*/
- if(job_ptr->details->min_cpus >
- qos_ptr->max_cpus_pu) {
- info("job %u being cancelled, "
- "min cpu limit %u exceeds "
- "qos per-user max %u",
- job_ptr->job_id,
- job_ptr->details->min_cpus,
- qos_ptr->max_cpus_pu);
- cancel_job = 1;
+ if (job_ptr->details->min_cpus >
+ qos_ptr->max_cpus_pu) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "min cpu limit %u exceeds "
+ "qos per-user max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_cpus,
+ qos_ptr->max_cpus_pu);
rc = false;
goto end_it;
}
@@ -1177,23 +1358,26 @@
used_limits = _get_used_limits_for_user(
qos_ptr->usage->user_limit_list,
job_ptr->user_id);
- if (used_limits && (used_limits->cpus
- >= qos_ptr->max_cpus_pu)) {
+ if (used_limits && ((used_limits->cpus +
+ job_ptr->details->min_cpus)
+ > qos_ptr->max_cpus_pu)) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_QOS_RESOURCE_LIMIT;
debug2("job %u being held, "
- "the job is at or exceeds "
+ "the user is at or would exceed "
"max cpus per-user limit "
- "%u with %u for QOS %s",
+ "%u with %u(+%u) for QOS %s",
job_ptr->job_id,
qos_ptr->max_cpus_pu,
- used_limits->cpus, qos_ptr->name);
+ used_limits->cpus,
+ job_ptr->details->min_cpus,
+ qos_ptr->name);
rc = false;
goto end_it;
}
}
-
if (qos_ptr->max_jobs_pu != INFINITE) {
if (!used_limits)
used_limits = _get_used_limits_for_user(
@@ -1202,6 +1386,7 @@
if (used_limits && (used_limits->jobs
>= qos_ptr->max_jobs_pu)) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_QOS_RESOURCE_LIMIT;
debug2("job %u being held, "
@@ -1220,13 +1405,15 @@
&& qos_ptr->max_nodes_pj != INFINITE) {
if (job_ptr->details->min_nodes >
qos_ptr->max_nodes_pj) {
- info("job %u being cancelled, "
- "min node limit %u exceeds "
- "qos max %u",
- job_ptr->job_id,
- job_ptr->details->min_nodes,
- qos_ptr->max_nodes_pj);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_JOB_LIMIT;
+ debug2("job %u being held, "
+ "min node limit %u exceeds "
+ "qos max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_nodes,
+ qos_ptr->max_nodes_pj);
rc = false;
goto end_it;
}
@@ -1239,13 +1426,15 @@
*/
if (job_ptr->details->min_nodes >
qos_ptr->max_nodes_pu) {
- info("job %u being cancelled, "
- "min node per-puser limit %u exceeds "
- "qos max %u",
- job_ptr->job_id,
- job_ptr->details->min_nodes,
- qos_ptr->max_nodes_pu);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "min node per-puser limit %u exceeds "
+ "qos max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_nodes,
+ qos_ptr->max_nodes_pu);
rc = false;
goto end_it;
}
@@ -1259,17 +1448,21 @@
used_limits = _get_used_limits_for_user(
qos_ptr->usage->user_limit_list,
job_ptr->user_id);
- if (used_limits && (used_limits->nodes
- >= qos_ptr->max_nodes_pu)) {
+ if (used_limits && ((used_limits->nodes
+ + job_ptr->details->min_nodes)
+ > qos_ptr->max_nodes_pu)) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_QOS_RESOURCE_LIMIT;
debug2("job %u being held, "
- "the job is at or exceeds "
+ "the user is at or would exceed "
"max nodes per-user "
- "limit %u with %u for QOS %s",
+ "limit %u with %u(+%u) for QOS %s",
job_ptr->job_id,
qos_ptr->max_nodes_pu,
- used_limits->nodes, qos_ptr->name);
+ used_limits->nodes,
+ job_ptr->details->min_nodes,
+ qos_ptr->name);
rc = false;
goto end_it;
}
@@ -1284,13 +1477,15 @@
time_limit = qos_ptr->max_wall_pj;
if ((job_ptr->time_limit != NO_VAL) &&
(job_ptr->time_limit > time_limit)) {
- info("job %u being cancelled, "
- "time limit %u exceeds qos "
- "max wall pj %u",
- job_ptr->job_id,
- job_ptr->time_limit,
- time_limit);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_QOS_JOB_LIMIT;
+ debug2("job %u being held, "
+ "time limit %u exceeds qos "
+ "max wall pj %u",
+ job_ptr->job_id,
+ job_ptr->time_limit,
+ time_limit);
rc = false;
goto end_it;
}
@@ -1311,8 +1506,8 @@
(qos_ptr && qos_ptr->grp_cpu_mins == (uint64_t)INFINITE))
&& (assoc_ptr->grp_cpu_mins != (uint64_t)INFINITE)
&& (usage_mins >= assoc_ptr->grp_cpu_mins)) {
- job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group max cpu minutes limit %"PRIu64" "
@@ -1330,15 +1525,17 @@
(qos_ptr && qos_ptr->grp_cpus == INFINITE))
&& (assoc_ptr->grp_cpus != INFINITE)) {
if (job_ptr->details->min_cpus > assoc_ptr->grp_cpus) {
- info("job %u being cancelled, "
- "min cpu request %u exceeds "
- "group max cpu limit %u for "
- "account %s",
- job_ptr->job_id,
- job_ptr->details->min_cpus,
- assoc_ptr->grp_cpus,
- assoc_ptr->acct);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_ASSOC_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "min cpu request %u exceeds "
+ "group max cpu limit %u for "
+ "account %s",
+ job_ptr->job_id,
+ job_ptr->details->min_cpus,
+ assoc_ptr->grp_cpus,
+ assoc_ptr->acct);
rc = false;
goto end_it;
}
@@ -1346,9 +1543,9 @@
if ((assoc_ptr->usage->grp_used_cpus +
job_ptr->details->min_cpus) >
assoc_ptr->grp_cpus) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_ASSOC_RESOURCE_LIMIT;
- xfree(job_ptr->state_desc);
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group max cpu limit %u "
@@ -1364,12 +1561,52 @@
}
}
+ if (!admin_set_memory_limit
+ && (!qos_ptr ||
+ (qos_ptr && qos_ptr->grp_mem == INFINITE))
+ && (assoc_ptr->grp_mem != INFINITE)) {
+ if (job_memory > assoc_ptr->grp_mem) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_ASSOC_RESOURCE_LIMIT;
+ info("job %u being held, "
+ "memory request %u exceeds "
+ "group memory limit %u for "
+ "account %s",
+ job_ptr->job_id,
+ job_memory,
+ assoc_ptr->grp_mem,
+ assoc_ptr->acct);
+ rc = false;
+ goto end_it;
+ }
+
+ if ((assoc_ptr->usage->grp_used_mem + job_memory) >
+ assoc_ptr->grp_mem) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_ASSOC_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "assoc %u is at or exceeds "
+ "group memory limit %u "
+ "with already used %u + requested %u "
+ "for account %s",
+ job_ptr->job_id, assoc_ptr->id,
+ assoc_ptr->grp_mem,
+ assoc_ptr->usage->grp_used_mem,
+ job_memory,
+ assoc_ptr->acct);
+ rc = false;
+ goto end_it;
+ }
+ }
+
if ((!qos_ptr ||
(qos_ptr && qos_ptr->grp_jobs == INFINITE)) &&
(assoc_ptr->grp_jobs != INFINITE) &&
(assoc_ptr->usage->used_jobs >= assoc_ptr->grp_jobs)) {
- job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_RESOURCE_LIMIT;
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group max jobs limit %u with %u for account %s",
@@ -1386,9 +1623,9 @@
&& (assoc_ptr->grp_cpu_run_mins != INFINITE)) {
if (cpu_run_mins + job_cpu_time_limit >
assoc_ptr->grp_cpu_run_mins) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_ASSOC_RESOURCE_LIMIT;
- xfree(job_ptr->state_desc);
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group max running cpu minutes "
@@ -1411,15 +1648,17 @@
&& (assoc_ptr->grp_nodes != INFINITE)) {
if (job_ptr->details->min_nodes >
assoc_ptr->grp_nodes) {
- info("job %u being cancelled, "
- "min node request %u exceeds "
- "group max node limit %u for "
- "account %s",
- job_ptr->job_id,
- job_ptr->details->min_nodes,
- assoc_ptr->grp_nodes,
- assoc_ptr->acct);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason =
+ WAIT_ASSOC_RESOURCE_LIMIT;
+ debug2("job %u being held, "
+ "min node request %u exceeds "
+ "group max node limit %u for "
+ "account %s",
+ job_ptr->job_id,
+ job_ptr->details->min_nodes,
+ assoc_ptr->grp_nodes,
+ assoc_ptr->acct);
rc = false;
goto end_it;
}
@@ -1427,9 +1666,9 @@
if ((assoc_ptr->usage->grp_used_nodes +
job_ptr->details->min_nodes) >
assoc_ptr->grp_nodes) {
+ xfree(job_ptr->state_desc);
job_ptr->state_reason =
WAIT_ASSOC_RESOURCE_LIMIT;
- xfree(job_ptr->state_desc);
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group max node limit %u "
@@ -1451,8 +1690,8 @@
(qos_ptr && qos_ptr->grp_wall == INFINITE))
&& (assoc_ptr->grp_wall != INFINITE)
&& (wall_mins >= assoc_ptr->grp_wall)) {
- job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_RESOURCE_LIMIT;
debug2("job %u being held, "
"assoc %u is at or exceeds "
"group wall limit %u "
@@ -1480,13 +1719,14 @@
cpu_time_limit = assoc_ptr->max_cpu_mins_pj;
if ((job_ptr->time_limit != NO_VAL) &&
(job_cpu_time_limit > cpu_time_limit)) {
- info("job %u being cancelled, "
- "cpu time limit %"PRIu64" exceeds "
- "assoc max per job %"PRIu64"",
- job_ptr->job_id,
- job_cpu_time_limit,
- cpu_time_limit);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
+ debug2("job %u being held, "
+ "cpu time limit %"PRIu64" exceeds "
+ "assoc max per job %"PRIu64"",
+ job_ptr->job_id,
+ job_cpu_time_limit,
+ cpu_time_limit);
rc = false;
goto end_it;
}
@@ -1497,13 +1737,14 @@
(assoc_ptr->max_cpus_pj != INFINITE)) {
if (job_ptr->details->min_cpus >
assoc_ptr->max_cpus_pj) {
- info("job %u being cancelled, "
- "min cpu limit %u exceeds "
- "account max %u",
- job_ptr->job_id,
- job_ptr->details->min_cpus,
- assoc_ptr->max_cpus_pj);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
+ debug2("job %u being held, "
+ "min cpu limit %u exceeds "
+ "account max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_cpus,
+ assoc_ptr->max_cpus_pj);
rc = false;
goto end_it;
}
@@ -1513,8 +1754,8 @@
(qos_ptr && qos_ptr->max_jobs_pu == INFINITE)) &&
(assoc_ptr->max_jobs != INFINITE) &&
(assoc_ptr->usage->used_jobs >= assoc_ptr->max_jobs)) {
- job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
debug2("job %u being held, "
"assoc %u is at or exceeds "
"max jobs limit %u with %u for account %s",
@@ -1530,13 +1771,14 @@
&& (assoc_ptr->max_nodes_pj != INFINITE)) {
if (job_ptr->details->min_nodes >
assoc_ptr->max_nodes_pj) {
- info("job %u being cancelled, "
- "min node limit %u exceeds "
- "account max %u",
- job_ptr->job_id,
- job_ptr->details->min_nodes,
- assoc_ptr->max_nodes_pj);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
+ debug2("job %u being held, "
+ "min node limit %u exceeds "
+ "account max %u",
+ job_ptr->job_id,
+ job_ptr->details->min_nodes,
+ assoc_ptr->max_nodes_pj);
rc = false;
goto end_it;
}
@@ -1553,13 +1795,13 @@
time_limit = assoc_ptr->max_wall_pj;
if ((job_ptr->time_limit != NO_VAL) &&
(job_ptr->time_limit > time_limit)) {
- info("job %u being cancelled, "
- "time limit %u exceeds account "
- "max %u",
- job_ptr->job_id,
- job_ptr->time_limit,
- time_limit);
- cancel_job = 1;
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
+ debug2("job %u being held, "
+ "time limit %u exceeds account max %u",
+ job_ptr->job_id,
+ job_ptr->time_limit,
+ time_limit);
rc = false;
goto end_it;
}
@@ -1571,25 +1813,19 @@
end_it:
assoc_mgr_unlock(&locks);
- if(cancel_job)
- _cancel_job(job_ptr);
-
return rc;
}
/*
- * acct_policy_update_pending_job - Make sure the limits imposed on a
- * job on submission are correct after an update to a qos or
- * association. If the association/qos limits prevent
- * the job from ever running (lowered limits since job submission),
- * then cancel the job.
+ * acct_policy_update_pending_job - Make sure the limits imposed on a job on
+ * submission are correct after an update to a qos or association. If
+ * the association/qos limits prevent the job from running (lowered
+ * limits since job submission), then reset its reason field.
*/
extern int acct_policy_update_pending_job(struct job_record *job_ptr)
{
job_desc_msg_t job_desc;
- uint16_t limit_set_max_cpus = 0;
- uint16_t limit_set_max_nodes = 0;
- uint16_t limit_set_time = 0;
+ acct_policy_limit_set_t acct_policy_limit_set;
bool update_accounting = false;
struct job_details *details_ptr;
int rc = SLURM_SUCCESS;
@@ -1612,10 +1848,12 @@
*/
slurm_init_job_desc_msg(&job_desc);
+ memset(&acct_policy_limit_set, 0, sizeof(acct_policy_limit_set_t));
+
job_desc.min_cpus = details_ptr->min_cpus;
/* Only set this value if not set from a limit */
if (job_ptr->limit_set_max_cpus == ADMIN_SET_LIMIT)
- limit_set_max_cpus = job_ptr->limit_set_max_cpus;
+ acct_policy_limit_set.max_cpus = job_ptr->limit_set_max_cpus;
else if ((details_ptr->max_cpus != NO_VAL)
&& !job_ptr->limit_set_max_cpus)
job_desc.max_cpus = details_ptr->max_cpus;
@@ -1623,66 +1861,77 @@
job_desc.min_nodes = details_ptr->min_nodes;
/* Only set this value if not set from a limit */
if (job_ptr->limit_set_max_nodes == ADMIN_SET_LIMIT)
- limit_set_max_nodes = job_ptr->limit_set_max_nodes;
+ acct_policy_limit_set.max_nodes = job_ptr->limit_set_max_nodes;
else if ((details_ptr->max_nodes != NO_VAL)
&& !job_ptr->limit_set_max_nodes)
job_desc.max_nodes = details_ptr->max_nodes;
else
job_desc.max_nodes = 0;
+ job_desc.pn_min_memory = details_ptr->pn_min_memory;
+ /* Only set this value if not set from a limit */
+ if (job_ptr->limit_set_pn_min_memory == ADMIN_SET_LIMIT)
+ acct_policy_limit_set.pn_min_memory =
+ job_ptr->limit_set_pn_min_memory;
+ else if ((details_ptr->pn_min_memory != NO_VAL)
+ && !job_ptr->limit_set_pn_min_memory)
+ job_desc.pn_min_memory = details_ptr->pn_min_memory;
+ else
+ job_desc.pn_min_memory = 0;
+
/* Only set this value if not set from a limit */
if (job_ptr->limit_set_time == ADMIN_SET_LIMIT)
- limit_set_time = job_ptr->limit_set_time;
+ acct_policy_limit_set.time = job_ptr->limit_set_time;
else if ((job_ptr->time_limit != NO_VAL) && !job_ptr->limit_set_time)
job_desc.time_limit = job_ptr->time_limit;
if (!acct_policy_validate(&job_desc, job_ptr->part_ptr,
job_ptr->assoc_ptr, job_ptr->qos_ptr,
- &limit_set_max_cpus,
- &limit_set_max_nodes,
- &limit_set_time, 0)) {
+ &job_ptr->state_reason,
+ &acct_policy_limit_set, 0)) {
info("acct_policy_update_pending_job: exceeded "
- "association/qos's cpu, node or "
+ "association/qos's cpu, node, memory or "
"time limit for job %d", job_ptr->job_id);
- _cancel_job(job_ptr);
return SLURM_ERROR;
}
/* If it isn't an admin set limit replace it. */
- if (!limit_set_max_cpus && (job_ptr->limit_set_max_cpus == 1)) {
+ if (!acct_policy_limit_set.max_cpus
+ && (job_ptr->limit_set_max_cpus == 1)) {
details_ptr->max_cpus = NO_VAL;
job_ptr->limit_set_max_cpus = 0;
update_accounting = true;
- } else if (limit_set_max_cpus != ADMIN_SET_LIMIT) {
+ } else if (acct_policy_limit_set.max_cpus != ADMIN_SET_LIMIT) {
if (details_ptr->max_cpus != job_desc.max_cpus) {
details_ptr->max_cpus = job_desc.max_cpus;
update_accounting = true;
}
- job_ptr->limit_set_max_cpus = limit_set_max_cpus;
+ job_ptr->limit_set_max_cpus = acct_policy_limit_set.max_cpus;
}
- if (!limit_set_max_nodes && (job_ptr->limit_set_max_nodes == 1)) {
+ if (!acct_policy_limit_set.max_nodes
+ && (job_ptr->limit_set_max_nodes == 1)) {
details_ptr->max_nodes = 0;
job_ptr->limit_set_max_nodes = 0;
update_accounting = true;
- } else if (limit_set_max_nodes != ADMIN_SET_LIMIT) {
+ } else if (acct_policy_limit_set.max_nodes != ADMIN_SET_LIMIT) {
if (details_ptr->max_nodes != job_desc.max_nodes) {
details_ptr->max_nodes = job_desc.max_nodes;
update_accounting = true;
}
- job_ptr->limit_set_max_nodes = limit_set_max_nodes;
+ job_ptr->limit_set_max_nodes = acct_policy_limit_set.max_nodes;
}
- if (!limit_set_time && (job_ptr->limit_set_time == 1)) {
+ if (!acct_policy_limit_set.time && (job_ptr->limit_set_time == 1)) {
job_ptr->time_limit = NO_VAL;
job_ptr->limit_set_time = 0;
update_accounting = true;
- } else if (limit_set_time != ADMIN_SET_LIMIT) {
+ } else if (acct_policy_limit_set.time != ADMIN_SET_LIMIT) {
if (job_ptr->time_limit != job_desc.time_limit) {
job_ptr->time_limit = job_desc.time_limit;
update_accounting = true;
}
- job_ptr->limit_set_time = limit_set_time;
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
}
if (update_accounting) {
@@ -1697,131 +1946,3 @@
return rc;
}
-
-extern bool acct_policy_node_usable(struct job_record *job_ptr,
- uint32_t used_cpus,
- char *node_name, uint32_t node_cpus)
-{
- slurmdb_qos_rec_t *qos_ptr;
- slurmdb_association_rec_t *assoc_ptr;
- bool rc = true;
- uint32_t total_cpus = used_cpus + node_cpus;
- bool cancel_job = 0;
- int parent = 0; /* flag to tell us if we are looking at the
- * parent or not
- */
- assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
- READ_LOCK, NO_LOCK, NO_LOCK };
-
- /* check to see if we are enforcing associations */
- if (!accounting_enforce)
- return true;
-
- if (!_valid_job_assoc(job_ptr)) {
- _cancel_job(job_ptr);
- return false;
- }
-
- /* now see if we are enforcing limits */
- if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
- return true;
-
- /* clear old state reason */
- if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
- (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT))
- job_ptr->state_reason = WAIT_NO_REASON;
-
-
- assoc_mgr_lock(&locks);
- qos_ptr = job_ptr->qos_ptr;
- if(qos_ptr) {
- if (qos_ptr->grp_cpus != INFINITE) {
- if ((total_cpus+qos_ptr->usage->grp_used_cpus)
- > qos_ptr->grp_cpus) {
- debug("Can't use %s, adding it's %u cpus "
- "exceeds "
- "group max cpu limit %u for qos '%s'",
- node_name,
- node_cpus,
- qos_ptr->grp_cpus,
- qos_ptr->name);
- rc = false;
- goto end_it;
- }
- }
-
- if (qos_ptr->max_cpus_pj != INFINITE) {
- if (total_cpus > qos_ptr->max_cpus_pj) {
- debug("Can't use %s, adding it's %u cpus "
- "exceeds "
- "max cpu limit %u for qos '%s'",
- node_name,
- node_cpus,
- qos_ptr->max_cpus_pj,
- qos_ptr->name);
- cancel_job = 1;
- rc = false;
- goto end_it;
- }
- }
- }
-
- assoc_ptr = job_ptr->assoc_ptr;
- while(assoc_ptr) {
- if ((!qos_ptr ||
- (qos_ptr && qos_ptr->grp_cpus == INFINITE))
- && (assoc_ptr->grp_cpus != INFINITE)) {
- if ((total_cpus+assoc_ptr->usage->grp_used_cpus)
- > assoc_ptr->grp_cpus) {
- debug("Can't use %s, adding it's %u cpus "
- "exceeds "
- "group max cpu limit %u for account '%s'",
- node_name,
- node_cpus,
- assoc_ptr->grp_cpus,
- assoc_ptr->acct);
- rc = false;
- goto end_it;
- }
- }
-
- /* We don't need to look at the regular limits for
- * parents since we have pre-propogated them, so just
- * continue with the next parent
- */
- if(parent) {
- assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
- continue;
- }
-
- if ((!qos_ptr ||
- (qos_ptr && qos_ptr->max_cpus_pj == INFINITE)) &&
- (assoc_ptr->max_cpus_pj != INFINITE)) {
- if (job_ptr->details->min_cpus >
- assoc_ptr->max_cpus_pj) {
- debug("Can't use %s, adding it's %u cpus "
- "exceeds "
- "max cpu limit %u for account '%s'",
- node_name,
- node_cpus,
- assoc_ptr->max_cpus_pj,
- assoc_ptr->acct);
- rc = false;
- goto end_it;
- }
- }
- assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
- parent = 1;
- }
-end_it:
- assoc_mgr_unlock(&locks);
-
- if(cancel_job)
- _cancel_job(job_ptr);
-
- return rc;
-}
diff --git a/src/slurmctld/acct_policy.h b/src/slurmctld/acct_policy.h
index e339d0f..bac0898 100644
--- a/src/slurmctld/acct_policy.h
+++ b/src/slurmctld/acct_policy.h
@@ -41,6 +41,16 @@
#define ADMIN_SET_LIMIT 0xffff
+typedef struct {
+ uint16_t max_cpus;
+ uint16_t max_nodes;
+ uint16_t min_cpus;
+ uint16_t min_nodes;
+ uint16_t pn_min_memory;
+ uint16_t qos;
+ uint16_t time;
+} acct_policy_limit_set_t;
+
/*
* acct_policy_add_job_submit - Note that a job has been submitted for
* accounting policy purposes.
@@ -70,9 +80,10 @@
struct part_record *part_ptr,
slurmdb_association_rec_t *assoc_in,
slurmdb_qos_rec_t *qos_ptr,
- uint16_t *limit_set_max_cpus,
- uint16_t *limit_set_max_nodes,
- uint16_t *limit_set_time, bool update_call);
+ uint16_t *state_reason,
+ acct_policy_limit_set_t *acct_policy_limit_set,
+ bool update_call);
+
/*
* acct_policy_job_runnable - Determine of the specified job can execute
* right now or not depending upon accounting policy (e.g. running
@@ -83,6 +94,12 @@
extern bool acct_policy_job_runnable(struct job_record *job_ptr);
/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by an association or QOS limit. Does not re-validate job state.
+ */
+extern bool acct_policy_job_runnable_state(struct job_record *job_ptr);
+
+/*
* acct_policy_update_pending_job - Make sure the limits imposed on a
* job on submission are correct after an update to a qos or
* association. If the association/qos limits prevent
@@ -91,9 +108,4 @@
*/
extern int acct_policy_update_pending_job(struct job_record *job_ptr);
-extern bool acct_policy_node_usable(struct job_record *job_ptr,
- uint32_t used_cpus,
- char *node_name, uint32_t node_cpus);
-
-
#endif /* !_HAVE_ACCT_POLICY_H */
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 647e3f9..c7d55f8 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -397,14 +397,16 @@
agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args;
if ((agent_arg_ptr->msg_type != REQUEST_JOB_NOTIFY) &&
- (agent_arg_ptr->msg_type != REQUEST_SHUTDOWN) &&
+ (agent_arg_ptr->msg_type != REQUEST_REBOOT_NODES) &&
(agent_arg_ptr->msg_type != REQUEST_RECONFIGURE) &&
+ (agent_arg_ptr->msg_type != REQUEST_SHUTDOWN) &&
(agent_arg_ptr->msg_type != SRUN_EXEC) &&
(agent_arg_ptr->msg_type != SRUN_TIMEOUT) &&
(agent_arg_ptr->msg_type != SRUN_NODE_FAIL) &&
(agent_arg_ptr->msg_type != SRUN_REQUEST_SUSPEND) &&
(agent_arg_ptr->msg_type != SRUN_USER_MSG) &&
(agent_arg_ptr->msg_type != SRUN_STEP_MISSING) &&
+ (agent_arg_ptr->msg_type != SRUN_STEP_SIGNAL) &&
(agent_arg_ptr->msg_type != SRUN_JOB_COMPLETE)) {
#ifdef HAVE_FRONT_END
span = set_span(agent_arg_ptr->node_count,
@@ -524,13 +526,14 @@
int i;
agent_info_t *agent_ptr = (agent_info_t *) args;
thd_t *thread_ptr = agent_ptr->thread_struct;
- unsigned long usec = 125000;
+ unsigned long usec = 5000;
ListIterator itr;
thd_complete_t thd_comp;
ret_data_info_t *ret_data_info = NULL;
if ( (agent_ptr->msg_type == SRUN_JOB_COMPLETE) ||
(agent_ptr->msg_type == SRUN_STEP_MISSING) ||
+ (agent_ptr->msg_type == SRUN_STEP_SIGNAL) ||
(agent_ptr->msg_type == SRUN_EXEC) ||
(agent_ptr->msg_type == SRUN_NODE_FAIL) ||
(agent_ptr->msg_type == SRUN_PING) ||
@@ -554,7 +557,7 @@
slurm_mutex_lock(&agent_ptr->thread_mutex);
for (i = 0; i < agent_ptr->thread_count; i++) {
//info("thread name %s",thread_ptr[i].node_name);
- if(!thread_ptr[i].ret_list) {
+ if (!thread_ptr[i].ret_list) {
_update_wdog_state(&thread_ptr[i],
&thread_ptr[i].state,
&thd_comp);
@@ -619,6 +622,7 @@
step_id = NO_VAL;
} else if ((agent_ptr->msg_type == SRUN_JOB_COMPLETE) ||
(agent_ptr->msg_type == SRUN_STEP_MISSING) ||
+ (agent_ptr->msg_type == SRUN_STEP_SIGNAL) ||
(agent_ptr->msg_type == SRUN_EXEC) ||
(agent_ptr->msg_type == SRUN_USER_MSG)) {
return; /* no need to note srun response */
@@ -819,6 +823,7 @@
(msg_type == SRUN_EXEC) ||
(msg_type == SRUN_JOB_COMPLETE) ||
(msg_type == SRUN_STEP_MISSING) ||
+ (msg_type == SRUN_STEP_SIGNAL) ||
(msg_type == SRUN_TIMEOUT) ||
(msg_type == SRUN_USER_MSG) ||
(msg_type == RESPONSE_RESOURCE_ALLOCATION) ||
@@ -1127,7 +1132,6 @@
xfree(queued_req_ptr);
}
-
/*
* agent_retry - Agent for retrying pending RPCs. One pending request is
* issued if it has been pending for at least min_wait seconds
@@ -1376,8 +1380,9 @@
RESPONSE_RESOURCE_ALLOCATION)
slurm_free_resource_allocation_response_msg(
agent_arg_ptr->msg_args);
- else if ((agent_arg_ptr->msg_type == REQUEST_ABORT_JOB) ||
- (agent_arg_ptr->msg_type == REQUEST_TERMINATE_JOB) ||
+ else if ((agent_arg_ptr->msg_type == REQUEST_ABORT_JOB) ||
+ (agent_arg_ptr->msg_type == REQUEST_TERMINATE_JOB) ||
+ (agent_arg_ptr->msg_type == REQUEST_KILL_PREEMPTED) ||
(agent_arg_ptr->msg_type == REQUEST_KILL_TIMELIMIT))
slurm_free_kill_job_msg(agent_arg_ptr->msg_args);
else if (agent_arg_ptr->msg_type == SRUN_USER_MSG)
@@ -1389,6 +1394,9 @@
else if (agent_arg_ptr->msg_type == SRUN_STEP_MISSING)
slurm_free_srun_step_missing_msg(
agent_arg_ptr->msg_args);
+ else if (agent_arg_ptr->msg_type == SRUN_STEP_SIGNAL)
+ slurm_free_job_step_kill_msg(
+ agent_arg_ptr->msg_args);
else if (agent_arg_ptr->msg_type == REQUEST_JOB_NOTIFY)
slurm_free_job_notify_msg(agent_arg_ptr->msg_args);
else
@@ -1529,7 +1537,7 @@
batch_job_launch_msg_t *launch_msg_ptr;
time_t now = time(NULL);
struct job_record *job_ptr;
- int delay_time, nodes_ready = 0;
+ int delay_time, nodes_ready = 0, tmp;
agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
@@ -1551,7 +1559,21 @@
}
if (job_ptr->wait_all_nodes) {
- (void) job_node_ready(launch_msg_ptr->job_id, &nodes_ready);
+ (void) job_node_ready(launch_msg_ptr->job_id, &tmp);
+ if (tmp == (READY_JOB_STATE | READY_NODE_STATE)) {
+ nodes_ready = 1;
+ if (launch_msg_ptr->alias_list &&
+ !strcmp(launch_msg_ptr->alias_list, "TBD")) {
+ /* Update launch RPC with correct node
+ * aliases */
+ struct job_record *job_ptr;
+ job_ptr = find_job_record(launch_msg_ptr->
+ job_id);
+ xfree(launch_msg_ptr->alias_list);
+ launch_msg_ptr->alias_list = xstrdup(job_ptr->
+ alias_list);
+ }
+ }
} else {
#ifdef HAVE_FRONT_END
nodes_ready = 1;
@@ -1612,3 +1634,11 @@
queued_req_ptr->last_attempt = now;
return 1;
}
+
+/* Return length of agent's retry_list */
+extern int retry_list_size(void)
+{
+ if (retry_list == NULL)
+ return 0;
+ return list_count(retry_list);
+}
diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h
index d663196..5547732 100644
--- a/src/slurmctld/agent.h
+++ b/src/slurmctld/agent.h
@@ -105,4 +105,7 @@
*/
extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type);
+/* Return length of agent's retry_list */
+extern int retry_list_size(void);
+
#endif /* !_AGENT_H */
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index aa1a86c..2456e76 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -58,6 +58,7 @@
#include "src/common/daemonize.h"
#include "src/common/log.h"
#include "src/common/macros.h"
+#include "src/common/node_select.h"
#include "src/common/slurm_auth.h"
#include "src/common/switch.h"
#include "src/common/xsignal.h"
@@ -224,6 +225,7 @@
}
slurmctld_config.shutdown_time = (time_t) 0;
unlock_slurmctld(config_write_lock);
+ select_g_select_nodeinfo_set_all(time(NULL));
return;
}
@@ -370,9 +372,6 @@
slurm_free_msg(msg);
- /* close should only be called when the socket
- * implementation is being used the following call will
- * be a no-op in a message/mongo implementation */
slurm_close_accepted_conn(newsockfd); /* close new socket */
}
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 2bb9c79..a0db10d 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -57,6 +57,7 @@
#include <string.h>
#include <sys/resource.h>
#include <sys/stat.h>
+#include <unistd.h>
#include "slurm/slurm_errno.h"
@@ -161,6 +162,10 @@
bool ping_nodes_now = false;
uint32_t cluster_cpus = 0;
int with_slurmdbd = 0;
+bool want_nodes_reboot = true;
+
+/* Next used for stats/diagnostics */
+diag_stats_t slurmctld_diag_stats;
/* Local variables */
static int daemonize = DEFAULT_DAEMONIZE;
@@ -202,6 +207,7 @@
static void _update_qos(slurmdb_qos_rec_t *rec);
inline static int _report_locks_set(void);
static void * _service_connection(void *arg);
+static void _set_work_dir(void);
static int _shutdown_backup_controller(int wait_time);
static void * _slurmctld_background(void *no_data);
static void * _slurmctld_rpc_mgr(void *no_data);
@@ -217,6 +223,9 @@
int newsockfd;
} connection_arg_t;
+time_t last_proc_req_start = 0;
+time_t next_stats_reset = 0;
+
/* main - slurmctld main function, start various threads and process RPCs */
int main(int argc, char *argv[])
{
@@ -254,25 +263,6 @@
sched_log_alter(sched_log_opts, LOG_DAEMON,
slurmctld_conf.sched_logfile);
debug("sched: slurmctld starting");
-
- if (slurmctld_conf.slurmctld_logfile &&
- (slurmctld_conf.slurmctld_logfile[0] == '/')) {
- char *slash_ptr, *work_dir;
- work_dir = xstrdup(slurmctld_conf.slurmctld_logfile);
- slash_ptr = strrchr(work_dir, '/');
- if (slash_ptr == work_dir)
- work_dir[1] = '\0';
- else
- slash_ptr[0] = '\0';
- if (chdir(work_dir) < 0)
- fatal("chdir(%s): %m", work_dir);
- xfree(work_dir);
- } else {
- if (chdir(slurmctld_conf.state_save_location) < 0) {
- fatal("chdir(%s): %m",
- slurmctld_conf.state_save_location);
- }
- }
} else {
slurmctld_config.daemonize = 0;
}
@@ -287,6 +277,8 @@
*/
_init_pidfile();
_become_slurm_user();
+ if (daemonize)
+ _set_work_dir();
if (stat(slurmctld_conf.mail_prog, &stat_buf) != 0)
error("Configured MailProg is invalid");
@@ -397,6 +389,7 @@
way. If we get an error we can't do anything about it.
*/
load_assoc_usage(slurmctld_conf.state_save_location);
+ load_qos_usage(slurmctld_conf.state_save_location);
/* This thread is looking for when we get correct data from
the database so we can update the assoc_ptr's in the jobs
@@ -686,6 +679,15 @@
break;
sleep(1);
}
+#ifdef HAVE_BG
+ /* Always call slurm_select_fini() on some systems like
+ BlueGene we need to make sure other processes are ended
+ or we could get a random core from within it's
+ underlying infrastructure.
+ */
+ slurm_select_fini();
+#endif
+
#endif
xfree(slurmctld_cluster_name);
@@ -967,8 +969,9 @@
no_thread = 0;
if (no_thread) {
- _service_connection((void *) conn_arg);
- }
+ slurmctld_diag_stats.proc_req_raw++;
+ _service_connection((void *) conn_arg);
+ }
}
debug3("_slurmctld_rpc_mgr shutting down");
@@ -998,17 +1001,14 @@
* slurm_receive_msg sets msg connection fd to accepted fd. This allows
* possibility for slurmctld_req() to close accepted connection.
*/
- if(slurm_receive_msg(conn->newsockfd, msg, 0) != 0) {
+ if (slurm_receive_msg(conn->newsockfd, msg, 0) != 0) {
error("slurm_receive_msg: %m");
- /* close should only be called when the socket implementation
- * is being used the following call will be a no-op in a
- * message/mongo implementation */
/* close the new socket */
slurm_close_accepted_conn(conn->newsockfd);
goto cleanup;
}
- if(errno != SLURM_SUCCESS) {
+ if (errno != SLURM_SUCCESS) {
if (errno == SLURM_PROTOCOL_VERSION_ERROR) {
slurm_send_rc_msg(msg, SLURM_PROTOCOL_VERSION_ERROR);
} else
@@ -1182,10 +1182,10 @@
{
int cnt = 0;
- cnt = job_cancel_by_assoc_id(rec->id);
+ cnt = job_hold_by_assoc_id(rec->id);
if (cnt) {
- info("Removed association id:%u user:%s, cancelled %u jobs",
+ info("Removed association id:%u user:%s, held %u jobs",
rec->id, rec->user, cnt);
} else
debug("Removed association id:%u user:%s", rec->id, rec->user);
@@ -1195,10 +1195,10 @@
{
int cnt = 0;
- cnt = job_cancel_by_qos_id(rec->id);
+ cnt = job_hold_by_qos_id(rec->id);
if (cnt) {
- info("Removed QOS:%s cancelled %u jobs", rec->name, cnt);
+ info("Removed QOS:%s held %u jobs", rec->name, cnt);
} else
debug("Removed QOS:%s", rec->name);
}
@@ -1251,6 +1251,57 @@
unlock_slurmctld(job_write_lock);
}
+static void _queue_reboot_msg(void)
+{
+ agent_arg_t *reboot_agent_args = NULL;
+ struct node_record *node_ptr;
+ char *host_str;
+ time_t now = time(NULL);
+ int i;
+ bool want_reboot;
+
+ want_nodes_reboot = false;
+ for (i = 0, node_ptr = node_record_table_ptr;
+ i < node_record_count; i++, node_ptr++) {
+ if (!IS_NODE_MAINT(node_ptr) || /* do it only if node */
+ is_node_in_maint_reservation(i)) /*isn't in reservation */
+ continue;
+ want_nodes_reboot = true; /* mark it for the next cycle */
+ if (IS_NODE_IDLE(node_ptr) && !IS_NODE_NO_RESPOND(node_ptr) &&
+ !IS_NODE_POWER_UP(node_ptr)) /* only active idle nodes */
+ want_reboot = true;
+ else if (IS_NODE_FUTURE(node_ptr) &&
+ (node_ptr->last_response == (time_t) 0))
+ want_reboot = true; /* system just restarted */
+ else
+ want_reboot = false;
+ if (!want_reboot)
+ continue;
+ if (reboot_agent_args == NULL) {
+ reboot_agent_args = xmalloc(sizeof(agent_arg_t));
+ reboot_agent_args->msg_type = REQUEST_REBOOT_NODES;
+ reboot_agent_args->retry = 0;
+ reboot_agent_args->hostlist = hostlist_create("");
+ }
+ hostlist_push(reboot_agent_args->hostlist, node_ptr->name);
+ reboot_agent_args->node_count++;
+ node_ptr->node_state = NODE_STATE_FUTURE |
+ (node_ptr->node_state & NODE_STATE_FLAGS);
+ bit_clear(avail_node_bitmap, i);
+ bit_clear(idle_node_bitmap, i);
+ node_ptr->last_response = now;
+ }
+ if (reboot_agent_args != NULL) {
+ hostlist_uniq(reboot_agent_args->hostlist);
+ host_str = hostlist_ranged_string_xmalloc(
+ reboot_agent_args->hostlist);
+ debug("Queuing reboot request for nodes %s", host_str);
+ xfree(host_str);
+ agent_queue_request(reboot_agent_args);
+ last_node_update = now;
+ }
+}
+
/*
* _slurmctld_background - process slurmctld background activities
* purge defunct job records, save state, schedule jobs, and
@@ -1273,6 +1324,7 @@
static time_t last_node_acct;
static time_t last_ctld_bu_ping;
static time_t last_uid_update;
+ static time_t last_reboot_msg_time;
static bool ping_msg_sent = false;
time_t now;
int no_resp_msg_interval, ping_interval, purge_job_interval;
@@ -1308,7 +1360,7 @@
last_purge_job_time = last_trigger = last_health_check_time = now;
last_timelimit_time = last_assert_primary_time = now;
last_no_resp_msg_time = last_resv_time = last_ctld_bu_ping = now;
- last_uid_update = now;
+ last_uid_update = last_reboot_msg_time = now;
if ((slurmctld_conf.min_job_age > 0) &&
(slurmctld_conf.min_job_age < PURGE_JOB_INTERVAL)) {
@@ -1371,7 +1423,7 @@
now = time(NULL);
last_resv_time = now;
lock_slurmctld(node_write_lock);
- set_node_maint_mode();
+ set_node_maint_mode(false);
unlock_slurmctld(node_write_lock);
}
@@ -1436,6 +1488,14 @@
unlock_slurmctld(job_read_lock);
}
+ if (want_nodes_reboot && (now > last_reboot_msg_time)) {
+ now = time(NULL);
+ last_reboot_msg_time = now;
+ lock_slurmctld(node_write_lock);
+ _queue_reboot_msg();
+ unlock_slurmctld(node_write_lock);
+ }
+
/* Process any pending agent work */
agent_retry(RPC_RETRY_INTERVAL, true);
@@ -1501,6 +1561,24 @@
last_node_acct = now;
_accounting_cluster_ready();
}
+
+
+ if (last_proc_req_start == 0) {
+ /* Stats will reset at midnight (aprox).
+ * Uhmmm... UTC time?... It is not so important.
+ * Just resetting during the night */
+ last_proc_req_start = now;
+ next_stats_reset = last_proc_req_start -
+ (last_proc_req_start % 86400) +
+ 86400;
+ }
+
+ if ((next_stats_reset > 0) && (now > next_stats_reset)) {
+ /* Resetting stats values */
+ last_proc_req_start = now;
+ next_stats_reset = now - (now % 86400) + 86400;
+ reset_stats(0);
+ }
/* Reassert this machine as the primary controller.
* A network or security problem could result in
@@ -1825,6 +1903,10 @@
* NOTE: READ lock_slurmctld config before entry */
void update_logging(void)
{
+ int rc;
+ uid_t slurm_user_id = slurmctld_conf.slurm_user_id;
+ gid_t slurm_user_gid = gid_from_uid(slurm_user_id);
+
/* Preserve execute line arguments (if any) */
if (debug_level) {
slurmctld_conf.slurmctld_debug = MIN(
@@ -1859,6 +1941,25 @@
sched_log_alter(sched_log_opts, LOG_DAEMON,
slurmctld_conf.sched_logfile);
+
+ if (slurmctld_conf.slurmctld_logfile) {
+ rc = chown(slurmctld_conf.slurmctld_logfile,
+ slurm_user_id, slurm_user_gid);
+ if (rc) {
+ error("chown(%s, %d, %d): %m",
+ slurmctld_conf.slurmctld_logfile,
+ (int) slurm_user_id, (int) slurm_user_gid);
+ }
+ }
+ if (slurmctld_conf.sched_logfile) {
+ rc = chown(slurmctld_conf.sched_logfile,
+ slurm_user_id, slurm_user_gid);
+ if (rc) {
+ error("chown(%s, %d, %d): %m",
+ slurmctld_conf.sched_logfile,
+ (int) slurm_user_id, (int) slurm_user_gid);
+ }
+ }
}
/* Reset slurmd nice value */
@@ -1972,7 +2073,7 @@
list_count(job_list));
itr = list_iterator_create(job_list);
while ((job_ptr = list_next(itr))) {
- if(job_ptr->assoc_id) {
+ if (job_ptr->assoc_id) {
memset(&assoc_rec, 0,
sizeof(slurmdb_association_rec_t));
assoc_rec.id = job_ptr->assoc_id;
@@ -2146,3 +2247,41 @@
return rc;
}
+
+static void _set_work_dir(void)
+{
+ bool success = false;
+
+ if (slurmctld_conf.slurmctld_logfile &&
+ (slurmctld_conf.slurmctld_logfile[0] == '/')) {
+ char *slash_ptr, *work_dir;
+ work_dir = xstrdup(slurmctld_conf.slurmctld_logfile);
+ slash_ptr = strrchr(work_dir, '/');
+ if (slash_ptr == work_dir)
+ work_dir[1] = '\0';
+ else
+ slash_ptr[0] = '\0';
+ if ((access(work_dir, W_OK) != 0) || (chdir(work_dir) < 0))
+ error("chdir(%s): %m", work_dir);
+ else
+ success = true;
+ xfree(work_dir);
+ }
+
+ if (!success) {
+ if ((access(slurmctld_conf.state_save_location, W_OK) != 0) ||
+ (chdir(slurmctld_conf.state_save_location) < 0)) {
+ error("chdir(%s): %m",
+ slurmctld_conf.state_save_location);
+ } else
+ success = true;
+ }
+
+ if (!success) {
+ if ((access("/var/tmp", W_OK) != 0) ||
+ (chdir("/var/tmp") < 0)) {
+ error("chdir(/var/tmp): %m");
+ } else
+ info("chdir to /var/tmp");
+ }
+}
diff --git a/src/slurmctld/front_end.c b/src/slurmctld/front_end.c
index df55834..91b47d8 100644
--- a/src/slurmctld/front_end.c
+++ b/src/slurmctld/front_end.c
@@ -145,9 +145,10 @@
/*
* assign_front_end - assign a front end node for starting a job
- * RET pointer to the front end node to use or NULL if none available
+ * IN batch_host - previously set batch_host name
+ * RET pointer to the front end node to use or NULL if none found
*/
-extern front_end_record_t *assign_front_end(void)
+extern front_end_record_t *assign_front_end(char *batch_host)
{
#ifdef HAVE_FRONT_END
static int last_assigned = -1;
@@ -158,10 +159,15 @@
for (i = 0; i < front_end_node_cnt; i++) {
last_assigned = (last_assigned + 1) % front_end_node_cnt;
front_end_ptr = front_end_nodes + last_assigned;
- if (IS_NODE_DOWN(front_end_ptr) ||
- IS_NODE_DRAIN(front_end_ptr) ||
- IS_NODE_NO_RESPOND(front_end_ptr))
- continue;
+ if (batch_host) { /* Find specific front-end node */
+ if (strcmp(batch_host, front_end_ptr->name))
+ continue;
+ } else { /* Find some usable front-end node */
+ if (IS_NODE_DOWN(front_end_ptr) ||
+ IS_NODE_DRAIN(front_end_ptr) ||
+ IS_NODE_NO_RESPOND(front_end_ptr))
+ continue;
+ }
state_flags = front_end_nodes[last_assigned].node_state &
NODE_STATE_FLAGS;
front_end_nodes[last_assigned].node_state =
@@ -169,7 +175,12 @@
front_end_nodes[last_assigned].job_cnt_run++;
return front_end_ptr;
}
- fatal("assign_front_end: no available front end nodes found");
+ if (batch_host) { /* Find specific front-end node */
+ error("assign_front_end: front end node %s not found",
+ batch_host);
+ } else { /* Find some usable front-end node */
+ error("assign_front_end: no available front end nodes found");
+ }
#endif
return NULL;
}
@@ -259,7 +270,7 @@
}
free(this_node_name);
}
- hostlist_destroy(host_list);
+ hostlist_destroy(host_list);
return rc;
#else
@@ -793,7 +804,8 @@
for (i = 0, front_end_ptr = front_end_nodes;
i < front_end_node_cnt; i++, front_end_ptr++) {
- if (IS_NODE_IDLE(front_end_ptr) &&
+ if ((IS_NODE_IDLE(front_end_ptr) ||
+ IS_NODE_UNKNOWN(front_end_ptr)) &&
(front_end_ptr->job_cnt_run != 0)) {
state_flags = front_end_ptr->node_state &
NODE_STATE_FLAGS;
diff --git a/src/slurmctld/front_end.h b/src/slurmctld/front_end.h
index bee023a..c6f3cc2 100644
--- a/src/slurmctld/front_end.h
+++ b/src/slurmctld/front_end.h
@@ -43,9 +43,10 @@
/*
* assign_front_end - assign a front end node for starting a job
- * RET pointer to the front end node to use or NULL if none available
+ * IN batch_host - previously set batch_host name
+ * RET pointer to the front end node to use or NULL if none found
*/
-extern front_end_record_t *assign_front_end(void);
+extern front_end_record_t *assign_front_end(char *batch_host);
/*
* avail_front_end - test if any front end nodes are available for starting job
diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c
index c9c934b3..dac9a35 100644
--- a/src/slurmctld/gang.c
+++ b/src/slurmctld/gang.c
@@ -40,6 +40,10 @@
* gang scheduler plugin for SLURM
*/
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <pthread.h>
#include <unistd.h>
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 3337b83..c9f3ddf 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -101,7 +101,8 @@
#define JOB_HASH_INX(_job_id) (_job_id % hash_table_size)
/* Change JOB_STATE_VERSION value when changing the state save format */
-#define JOB_STATE_VERSION "VER011"
+#define JOB_STATE_VERSION "VER012"
+#define JOB_2_4_STATE_VERSION "VER012" /* SLURM version 2.4 */
#define JOB_2_3_STATE_VERSION "VER011" /* SLURM version 2.3 */
#define JOB_2_2_STATE_VERSION "VER010" /* SLURM version 2.2 */
#define JOB_2_1_STATE_VERSION "VER009" /* SLURM version 2.1 */
@@ -141,8 +142,8 @@
static void _del_batch_list_rec(void *x);
static void _delete_job_desc_files(uint32_t job_id);
static slurmdb_qos_rec_t *_determine_and_validate_qos(
- slurmdb_association_rec_t *assoc_ptr,
- slurmdb_qos_rec_t *qos_rec, int *error_code);
+ char *resv_name, slurmdb_association_rec_t *assoc_ptr,
+ bool admin, slurmdb_qos_rec_t *qos_rec, int *error_code);
static void _dump_job_details(struct job_details *detail_ptr,
Buf buffer);
static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer);
@@ -181,7 +182,6 @@
static int _resume_job_nodes(struct job_record *job_ptr, bool indf_susp);
static void _send_job_kill(struct job_record *job_ptr);
static void _set_job_id(struct job_record *job_ptr);
-static void _set_job_prio(struct job_record *job_ptr);
static void _signal_batch_job(struct job_record *job_ptr, uint16_t signal);
static void _signal_job(struct job_record *job_ptr, int signal);
static void _suspend_job(struct job_record *job_ptr, uint16_t op);
@@ -319,7 +319,7 @@
static uint32_t _max_switch_wait(uint32_t input_wait)
{
static time_t sched_update = 0;
- static uint32_t max_wait = 60;
+ static uint32_t max_wait = 300; /* default max_switch_wait, seconds */
char *sched_params, *tmp_ptr;
int i;
@@ -345,9 +345,8 @@
}
static slurmdb_qos_rec_t *_determine_and_validate_qos(
- slurmdb_association_rec_t *assoc_ptr,
- slurmdb_qos_rec_t *qos_rec,
- int *error_code)
+ char *resv_name, slurmdb_association_rec_t *assoc_ptr,
+ bool admin, slurmdb_qos_rec_t *qos_rec, int *error_code)
{
slurmdb_qos_rec_t *qos_ptr = NULL;
@@ -355,15 +354,16 @@
with the association. If not just fill in the qos and
continue. */
- if(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
+ if (accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
xassert(assoc_ptr);
xassert(qos_rec);
- if(!qos_rec->name && !qos_rec->id) {
- if(assoc_ptr && assoc_ptr->usage->valid_qos)
- if(assoc_ptr->def_qos_id)
+ if (!qos_rec->name && !qos_rec->id) {
+ if (assoc_ptr && assoc_ptr->usage->valid_qos) {
+ if (assoc_ptr->def_qos_id)
qos_rec->id = assoc_ptr->def_qos_id;
- else if(bit_set_count(assoc_ptr->usage->valid_qos) == 1)
+ else if (bit_set_count(assoc_ptr->usage->valid_qos)
+ == 1)
qos_rec->id =
bit_ffs(assoc_ptr->usage->valid_qos);
else if (assoc_mgr_root_assoc
@@ -371,25 +371,25 @@
qos_rec->id = assoc_mgr_root_assoc->def_qos_id;
else
qos_rec->name = "normal";
- else if (assoc_mgr_root_assoc
- && assoc_mgr_root_assoc->def_qos_id)
- qos_rec->id = assoc_mgr_root_assoc->def_qos_id;
+ } else if (assoc_mgr_root_assoc
+ && assoc_mgr_root_assoc->def_qos_id)
+ qos_rec->id = assoc_mgr_root_assoc->def_qos_id;
else
qos_rec->name = "normal";
}
- if(assoc_mgr_fill_in_qos(acct_db_conn, qos_rec, accounting_enforce,
- &qos_ptr)
- != SLURM_SUCCESS) {
+ if (assoc_mgr_fill_in_qos(acct_db_conn, qos_rec, accounting_enforce,
+ &qos_ptr) != SLURM_SUCCESS) {
error("Invalid qos (%s)", qos_rec->name);
*error_code = ESLURM_INVALID_QOS;
return NULL;
}
- if((accounting_enforce & ACCOUNTING_ENFORCE_QOS)
- && assoc_ptr
- && (!assoc_ptr->usage->valid_qos
- || !bit_test(assoc_ptr->usage->valid_qos, qos_rec->id))) {
+ if ((accounting_enforce & ACCOUNTING_ENFORCE_QOS)
+ && assoc_ptr
+ && !admin
+ && (!assoc_ptr->usage->valid_qos
+ || !bit_test(assoc_ptr->usage->valid_qos, qos_rec->id))) {
error("This association %d(account='%s', "
"user='%s', partition='%s') does not have "
"access to qos %s",
@@ -399,6 +399,14 @@
return NULL;
}
+ if (qos_ptr && (qos_ptr->flags & QOS_FLAG_REQ_RESV)
+ && (!resv_name || resv_name[0] == '\0')) {
+ error("qos %s can only be used in a reservation",
+ qos_rec->name);
+ *error_code = ESLURM_INVALID_QOS;
+ return NULL;
+ }
+
*error_code = SLURM_SUCCESS;
return qos_ptr;
}
@@ -617,6 +625,8 @@
if (ver_str) {
if (!strcmp(ver_str, JOB_STATE_VERSION)) {
protocol_version = SLURM_PROTOCOL_VERSION;
+ } else if (!strcmp(ver_str, JOB_2_3_STATE_VERSION)) {
+ protocol_version = SLURM_2_3_PROTOCOL_VERSION;
} else if (!strcmp(ver_str, JOB_2_2_STATE_VERSION)) {
protocol_version = SLURM_2_2_PROTOCOL_VERSION;
} else if (!strcmp(ver_str, JOB_2_1_STATE_VERSION)) {
@@ -634,6 +644,13 @@
}
xfree(ver_str);
+ /* There was a bug in 2.4.0 where the job state version wasn't
+ * incremented correctly. Luckly the node state was. We will
+ * use it to set the version correctly in the job.
+ */
+ if (load_2_4_state && protocol_version == SLURM_2_3_PROTOCOL_VERSION)
+ protocol_version = SLURM_2_4_PROTOCOL_VERSION;
+
safe_unpack_time(&buf_time, buffer);
safe_unpack32( &saved_job_id, buffer);
job_id_sequence = MAX(saved_job_id, job_id_sequence);
@@ -794,7 +811,9 @@
pack16(dump_job_ptr->limit_set_max_nodes, buffer);
pack16(dump_job_ptr->limit_set_min_cpus, buffer);
pack16(dump_job_ptr->limit_set_min_nodes, buffer);
+ pack16(dump_job_ptr->limit_set_pn_min_memory, buffer);
pack16(dump_job_ptr->limit_set_time, buffer);
+ pack16(dump_job_ptr->limit_set_qos, buffer);
packstr(dump_job_ptr->state_desc, buffer);
packstr(dump_job_ptr->resp_host, buffer);
@@ -874,9 +893,10 @@
uint16_t alloc_resp_port, other_port, mail_type, state_reason;
uint16_t restart_cnt, resv_flags, ckpt_interval;
uint16_t wait_all_nodes, warn_signal, warn_time;
- uint16_t limit_set_max_cpus = 0, limit_set_max_nodes = 0,
- limit_set_min_cpus = 0, limit_set_min_nodes = 0,
- limit_set_time = 0;
+ uint16_t limit_set_max_cpus = 0, limit_set_max_nodes = 0;
+ uint16_t limit_set_min_cpus = 0, limit_set_min_nodes = 0;
+ uint16_t limit_set_pn_min_memory = 0;
+ uint16_t limit_set_time = 0, limit_set_qos = 0;
char *nodes = NULL, *partition = NULL, *name = NULL, *resp_host = NULL;
char *account = NULL, *network = NULL, *mail_user = NULL;
char *comment = NULL, *nodes_completing = NULL, *alloc_node = NULL;
@@ -894,7 +914,159 @@
slurmdb_qos_rec_t qos_rec;
bool job_finished = false;
- if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpack32(&assoc_id, buffer);
+ safe_unpack32(&job_id, buffer);
+
+ /* validity test as possible */
+ if (job_id == 0) {
+ verbose("Invalid job_id %u", job_id);
+ goto unpack_error;
+ }
+
+ job_ptr = find_job_record(job_id);
+ if (job_ptr == NULL) {
+ job_ptr = create_job_record(&error_code);
+ if (error_code) {
+ error("Create job entry failed for job_id %u",
+ job_id);
+ goto unpack_error;
+ }
+ job_ptr->job_id = job_id;
+ _add_job_hash(job_ptr);
+ }
+
+ safe_unpack32(&user_id, buffer);
+ safe_unpack32(&group_id, buffer);
+ safe_unpack32(&time_limit, buffer);
+ safe_unpack32(&time_min, buffer);
+ safe_unpack32(&priority, buffer);
+ safe_unpack32(&alloc_sid, buffer);
+ safe_unpack32(&total_cpus, buffer);
+ safe_unpack32(&total_nodes, buffer);
+ safe_unpack32(&cpu_cnt, buffer);
+ safe_unpack32(&exit_code, buffer);
+ safe_unpack32(&derived_ec, buffer);
+ safe_unpack32(&db_index, buffer);
+ safe_unpack32(&resv_id, buffer);
+ safe_unpack32(&next_step_id, buffer);
+ safe_unpack32(&qos_id, buffer);
+ safe_unpack32(&req_switch, buffer);
+ safe_unpack32(&wait4switch, buffer);
+
+ safe_unpack_time(&preempt_time, buffer);
+ safe_unpack_time(&start_time, buffer);
+ safe_unpack_time(&end_time, buffer);
+ safe_unpack_time(&suspend_time, buffer);
+ safe_unpack_time(&pre_sus_time, buffer);
+ safe_unpack_time(&resize_time, buffer);
+ safe_unpack_time(&tot_sus_time, buffer);
+
+ safe_unpack16(&direct_set_prio, buffer);
+ safe_unpack16(&job_state, buffer);
+ safe_unpack16(&kill_on_node_fail, buffer);
+ safe_unpack16(&batch_flag, buffer);
+ safe_unpack16(&mail_type, buffer);
+ safe_unpack16(&state_reason, buffer);
+ safe_unpack16(&restart_cnt, buffer);
+ safe_unpack16(&resv_flags, buffer);
+ safe_unpack16(&wait_all_nodes, buffer);
+ safe_unpack16(&warn_signal, buffer);
+ safe_unpack16(&warn_time, buffer);
+ safe_unpack16(&limit_set_max_cpus, buffer);
+ safe_unpack16(&limit_set_max_nodes, buffer);
+ safe_unpack16(&limit_set_min_cpus, buffer);
+ safe_unpack16(&limit_set_min_nodes, buffer);
+ safe_unpack16(&limit_set_pn_min_memory, buffer);
+ safe_unpack16(&limit_set_time, buffer);
+ safe_unpack16(&limit_set_qos, buffer);
+
+ safe_unpackstr_xmalloc(&state_desc, &name_len, buffer);
+ safe_unpackstr_xmalloc(&resp_host, &name_len, buffer);
+
+ safe_unpack16(&alloc_resp_port, buffer);
+ safe_unpack16(&other_port, buffer);
+
+ if (job_state & JOB_COMPLETING) {
+ safe_unpackstr_xmalloc(&nodes_completing,
+ &name_len, buffer);
+ }
+ safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
+ safe_unpackstr_xmalloc(&partition, &name_len, buffer);
+ if (partition == NULL) {
+ error("No partition for job %u", job_id);
+ goto unpack_error;
+ }
+ part_ptr = find_part_record (partition);
+ if (part_ptr == NULL) {
+ part_ptr_list = get_part_list(partition);
+ if (part_ptr_list)
+ part_ptr = list_peek(part_ptr_list);
+ }
+ if (part_ptr == NULL) {
+ verbose("Invalid partition (%s) for job_id %u",
+ partition, job_id);
+ /* not fatal error, partition could have been removed,
+ * reset_job_bitmaps() will clean-up this job */
+ }
+
+ safe_unpackstr_xmalloc(&name, &name_len, buffer);
+ safe_unpackstr_xmalloc(&wckey, &name_len, buffer);
+ safe_unpackstr_xmalloc(&alloc_node, &name_len, buffer);
+ safe_unpackstr_xmalloc(&account, &name_len, buffer);
+ safe_unpackstr_xmalloc(&comment, &name_len, buffer);
+ safe_unpackstr_xmalloc(&gres, &name_len, buffer);
+ safe_unpackstr_xmalloc(&network, &name_len, buffer);
+ safe_unpackstr_xmalloc(&licenses, &name_len, buffer);
+ safe_unpackstr_xmalloc(&mail_user, &name_len, buffer);
+ safe_unpackstr_xmalloc(&resv_name, &name_len, buffer);
+ safe_unpackstr_xmalloc(&batch_host, &name_len, buffer);
+
+ if (select_g_select_jobinfo_unpack(&select_jobinfo, buffer,
+ protocol_version))
+ goto unpack_error;
+ if (unpack_job_resources(&job_resources, buffer,
+ protocol_version))
+ goto unpack_error;
+
+ safe_unpack16(&ckpt_interval, buffer);
+ if (checkpoint_alloc_jobinfo(&check_job) ||
+ checkpoint_unpack_jobinfo(check_job, buffer,
+ protocol_version))
+ goto unpack_error;
+
+ safe_unpackstr_array(&spank_job_env, &spank_job_env_size,
+ buffer);
+
+ if (gres_plugin_job_state_unpack(&gres_list, buffer, job_id,
+ protocol_version) !=
+ SLURM_SUCCESS)
+ goto unpack_error;
+ gres_plugin_job_state_log(gres_list, job_id);
+
+ safe_unpack16(&details, buffer);
+ if ((details == DETAILS_FLAG) &&
+ (_load_job_details(job_ptr, buffer, protocol_version))) {
+ job_ptr->job_state = JOB_FAILED;
+ job_ptr->exit_code = 1;
+ job_ptr->state_reason = FAIL_SYSTEM;
+ xfree(job_ptr->state_desc);
+ job_ptr->end_time = now;
+ goto unpack_error;
+ }
+ safe_unpack16(&step_flag, buffer);
+
+ while (step_flag == STEP_FLAG) {
+ /* No need to put these into accounting if they
+ * haven't been since all information will be
+ * put in when the job is finished.
+ */
+ if ((error_code = load_step_state(job_ptr, buffer,
+ protocol_version)))
+ goto unpack_error;
+ safe_unpack16(&step_flag, buffer);
+ }
+ } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
safe_unpack32(&assoc_id, buffer);
safe_unpack32(&job_id, buffer);
@@ -1450,6 +1622,8 @@
job_ptr->tot_sus_time = tot_sus_time;
job_ptr->preempt_time = preempt_time;
job_ptr->user_id = user_id;
+ select_g_select_jobinfo_set(job_ptr->select_jobinfo,
+ SELECT_JOBDATA_USER_NAME, &user_id);
job_ptr->wait_all_nodes = wait_all_nodes;
job_ptr->warn_signal = warn_signal;
job_ptr->warn_time = warn_time;
@@ -1457,7 +1631,9 @@
job_ptr->limit_set_max_nodes = limit_set_max_nodes;
job_ptr->limit_set_min_cpus = limit_set_min_cpus;
job_ptr->limit_set_min_nodes = limit_set_min_nodes;
+ job_ptr->limit_set_pn_min_memory = limit_set_pn_min_memory;
job_ptr->limit_set_time = limit_set_time;
+ job_ptr->limit_set_qos = limit_set_qos;
job_ptr->req_switch = req_switch;
job_ptr->wait4switch = wait4switch;
/* This needs to always to initialized to "true". The select
@@ -1473,11 +1649,12 @@
* association record. If not look for it by
* account,partition, user_id.
*/
- if(job_ptr->assoc_id)
+ if (job_ptr->assoc_id)
assoc_rec.id = job_ptr->assoc_id;
else {
assoc_rec.acct = job_ptr->account;
- assoc_rec.partition = job_ptr->partition;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
assoc_rec.uid = job_ptr->user_id;
}
@@ -1487,16 +1664,9 @@
&job_ptr->assoc_ptr) &&
(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
&& (!IS_JOB_FINISHED(job_ptr))) {
- info("Cancelling job %u with invalid association",
- job_id);
- job_ptr->job_state = JOB_CANCELLED;
- job_ptr->state_reason = FAIL_ACCOUNT;
+ info("Holding job %u with invalid association", job_id);
xfree(job_ptr->state_desc);
- if (IS_JOB_PENDING(job_ptr))
- job_ptr->start_time = now;
- job_ptr->end_time = now;
- job_completion_logger(job_ptr, false);
- job_finished = 1;
+ job_ptr->state_reason = FAIL_ACCOUNT;
} else {
job_ptr->assoc_id = assoc_rec.id;
info("Recovered job %u %u", job_id, job_ptr->assoc_id);
@@ -1519,25 +1689,22 @@
}
}
- if (!job_finished && job_ptr->qos_id) {
+ if (!job_finished && job_ptr->qos_id &&
+ (job_ptr->state_reason != FAIL_ACCOUNT)) {
memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t));
qos_rec.id = job_ptr->qos_id;
job_ptr->qos_ptr = _determine_and_validate_qos(
- job_ptr->assoc_ptr, &qos_rec, &qos_error);
- if (qos_error != SLURM_SUCCESS) {
- info("Cancelling job %u with invalid qos", job_id);
- job_ptr->job_state = JOB_CANCELLED;
- job_ptr->state_reason = FAIL_QOS;
+ job_ptr->resv_name, job_ptr->assoc_ptr,
+ job_ptr->limit_set_qos, &qos_rec,
+ &qos_error);
+ if ((qos_error != SLURM_SUCCESS) && !job_ptr->limit_set_qos) {
+ info("Holding job %u with invalid qos", job_id);
xfree(job_ptr->state_desc);
- if (IS_JOB_PENDING(job_ptr))
- job_ptr->start_time = now;
- job_ptr->end_time = now;
- job_completion_logger(job_ptr, false);
- job_finished = 1;
+ job_ptr->state_reason = FAIL_QOS;
}
job_ptr->qos_id = qos_rec.id;
}
- build_node_details(job_ptr); /* set node_addr */
+ build_node_details(job_ptr, false); /* set node_addr */
return SLURM_SUCCESS;
unpack_error:
@@ -2154,7 +2321,7 @@
info("requeue job %u due to failure of node %s",
job_ptr->job_id, node_name);
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
snprintf(requeue_msg, sizeof(requeue_msg),
"Job requeued due to failure "
"of node %s",
@@ -2764,10 +2931,11 @@
int error_code;
bool no_alloc, top_prio, test_only, too_fragmented, independent;
struct job_record *job_ptr;
+ time_t now = time(NULL);
+
error_code = _job_create(job_specs, allocate, will_run,
&job_ptr, submit_uid);
*job_pptr = job_ptr;
- time_t now = time(NULL);
if (error_code) {
if (job_ptr && (immediate || will_run)) {
@@ -2788,7 +2956,7 @@
* job is eligible.
*/
if (job_ptr->priority == NO_VAL)
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
if (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS)
independent = false;
@@ -2866,6 +3034,7 @@
slurm_sched_schedule(); /* work for external scheduler */
}
+ slurmctld_diag_stats.jobs_submitted++;
acct_policy_add_job_submit(job_ptr);
if ((error_code == ESLURM_NODES_BUSY) ||
@@ -2960,11 +3129,13 @@
job_ptr->end_time = now;
last_job_update = now;
job_ptr->job_state = JOB_FAILED | JOB_COMPLETING;
- build_cg_bitmap(job_ptr);
job_ptr->exit_code = 1;
job_ptr->state_reason = FAIL_LAUNCH;
xfree(job_ptr->state_desc);
- deallocate_nodes(job_ptr, false, suspended, false);
+ if (job_ptr->node_bitmap) {
+ build_cg_bitmap(job_ptr);
+ deallocate_nodes(job_ptr, false, suspended, false);
+ }
job_completion_logger(job_ptr, false);
return SLURM_SUCCESS;
}
@@ -3176,12 +3347,14 @@
if (IS_JOB_RUNNING(job_ptr))
job_comp_flag = JOB_COMPLETING;
- else if (IS_JOB_PENDING(job_ptr))
+ else if (IS_JOB_PENDING(job_ptr)) {
+ job_return_code = NO_VAL;
job_ptr->start_time = now;
+ }
if ((job_return_code == NO_VAL) &&
(IS_JOB_RUNNING(job_ptr) || IS_JOB_PENDING(job_ptr))) {
- info("Job %u cancelled from srun", job_ptr->job_id);
+ info("Job %u cancelled from interactive user", job_ptr->job_id);
}
if (IS_JOB_SUSPENDED(job_ptr)) {
@@ -3322,7 +3495,16 @@
{
uint32_t total_nodes;
- if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && (submit_uid != 0)) {
+ if ((part_ptr->flags & PART_FLAG_REQ_RESV) &&
+ (!job_desc->reservation || !strlen(job_desc->reservation))) {
+ info("_part_access_check: uid %u access to partition %s "
+ "denied, requires reservation",
+ (unsigned int) submit_uid, part_ptr->name);
+ return ESLURM_ACCESS_DENIED;
+ }
+
+ if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && (submit_uid != 0) &&
+ (submit_uid != slurmctld_conf.slurm_user_id)) {
info("_part_access_check: uid %u access to partition %s "
"denied, not root",
(unsigned int) submit_uid, part_ptr->name);
@@ -3501,21 +3683,22 @@
job_desc->min_nodes = min_nodes_orig;
}
- if (job_desc->max_nodes == NO_VAL) {
-#ifdef HAVE_BG
- job_desc->max_nodes = min_nodes_orig;
-#else
- ;
-#endif
- } else if (slurmctld_conf.enforce_part_limits &&
- job_desc->max_nodes &&
- (job_desc->max_nodes < min_nodes_orig)) {
+ if ((job_desc->max_nodes != NO_VAL) &&
+ slurmctld_conf.enforce_part_limits &&
+ (job_desc->max_nodes < min_nodes_orig)) {
info("_valid_job_part: job's max nodes less than partition's "
"min nodes (%u < %u)",
job_desc->max_nodes, min_nodes_orig);
rc = ESLURM_INVALID_NODE_COUNT;
goto fini;
}
+#ifndef HAVE_FRONT_END
+ if ((job_desc->min_nodes == 0) && (job_desc->script == NULL)) {
+ info("_valid_job_part: min_nodes==0 for non-batch job");
+ rc = ESLURM_INVALID_NODE_COUNT;
+ goto fini;
+ }
+#endif
if ((job_desc->time_limit == NO_VAL) &&
(part_ptr->default_time != NO_VAL))
@@ -3569,53 +3752,74 @@
struct job_record *job_ptr = NULL;
slurmdb_qos_rec_t *qos_ptr;
slurmdb_association_rec_t *assoc_ptr;
-
-
+ uint32_t job_min_nodes, job_max_nodes;
+ uint32_t part_min_nodes, part_max_nodes;
+#ifdef HAVE_BG
+ static uint16_t cpus_per_node = 0;
+ if (!cpus_per_node)
+ select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
+ &cpus_per_node);
+#endif
job_ptr = *job_pptr;
detail_ptr = job_ptr->details;
part_ptr = job_ptr->part_ptr;
qos_ptr = job_ptr->qos_ptr;
assoc_ptr = job_ptr->assoc_ptr;
+#ifdef HAVE_BG
+ job_min_nodes = detail_ptr->min_cpus / cpus_per_node;
+ job_max_nodes = detail_ptr->max_cpus / cpus_per_node;
+ part_min_nodes = part_ptr->min_nodes_orig;
+ part_max_nodes = part_ptr->max_nodes_orig;
+#else
+ job_min_nodes = detail_ptr->min_nodes;
+ job_max_nodes = detail_ptr->max_nodes;
+ part_min_nodes = part_ptr->min_nodes;
+ part_max_nodes = part_ptr->max_nodes;
+#endif
+
fail_reason = WAIT_NO_REASON;
- if ((detail_ptr->min_nodes > part_ptr->max_nodes) &&
+
+ if ((job_min_nodes > part_max_nodes) &&
(!qos_ptr || (qos_ptr && !(qos_ptr->flags
& QOS_FLAG_PART_MAX_NODE)))) {
- info("Job %u requested too many nodes (%u) of "
- "partition %s(MaxNodes %u)",
- job_ptr->job_id, detail_ptr->min_nodes,
- part_ptr->name, part_ptr->max_nodes);
+ debug2("Job %u requested too many nodes (%u) of "
+ "partition %s(MaxNodes %u)",
+ job_ptr->job_id, job_min_nodes,
+ part_ptr->name, part_max_nodes);
fail_reason = WAIT_PART_NODE_LIMIT;
- } else if ((detail_ptr->max_nodes != 0) && /* no max_nodes for job */
- ((detail_ptr->max_nodes < part_ptr->min_nodes) &&
+ } else if ((job_max_nodes != 0) && /* no max_nodes for job */
+ ((job_max_nodes < part_min_nodes) &&
(!qos_ptr || (qos_ptr && !(qos_ptr->flags &
QOS_FLAG_PART_MIN_NODE))))) {
- info("Job %u requested too few nodes (%u) of "
- "partition %s(MinNodes %u)",
- job_ptr->job_id, detail_ptr->max_nodes,
- part_ptr->name, part_ptr->min_nodes);
+ debug2("Job %u requested too few nodes (%u) of "
+ "partition %s(MinNodes %u)",
+ job_ptr->job_id, job_max_nodes,
+ part_ptr->name, part_min_nodes);
fail_reason = WAIT_PART_NODE_LIMIT;
} else if (part_ptr->state_up == PARTITION_DOWN) {
- info("Job %u requested down partition %s",
- job_ptr->job_id, part_ptr->name);
+ debug2("Job %u requested down partition %s",
+ job_ptr->job_id, part_ptr->name);
fail_reason = WAIT_PART_DOWN;
} else if (part_ptr->state_up == PARTITION_INACTIVE) {
- info("Job %u requested inactive partition %s",
- job_ptr->job_id, part_ptr->name);
+ debug2("Job %u requested inactive partition %s",
+ job_ptr->job_id, part_ptr->name);
fail_reason = WAIT_PART_INACTIVE;
- } else if ((job_ptr->time_limit != NO_VAL) &&
- ((job_ptr->time_limit > part_ptr->max_time) &&
- (!qos_ptr || (qos_ptr && !(qos_ptr->flags &
- QOS_FLAG_PART_TIME_LIMIT))))) {
- info("Job %u exceeds partition time limit", job_ptr->job_id);
+ } else if ((((job_ptr->time_limit != NO_VAL) &&
+ (job_ptr->time_limit > part_ptr->max_time)) ||
+ ((job_ptr->time_min != NO_VAL) &&
+ (job_ptr->time_min > part_ptr->max_time))) &&
+ (!qos_ptr || (qos_ptr && !(qos_ptr->flags &
+ QOS_FLAG_PART_TIME_LIMIT)))) {
+ debug2("Job %u exceeds partition time limit", job_ptr->job_id);
fail_reason = WAIT_PART_TIME_LIMIT;
} else if (qos_ptr && assoc_ptr &&
(qos_ptr->flags & QOS_FLAG_ENFORCE_USAGE_THRES) &&
(!fuzzy_equal(qos_ptr->usage_thres, NO_VAL))) {
- if (!job_ptr->prio_factors)
+ if (!job_ptr->prio_factors) {
job_ptr->prio_factors =
xmalloc(sizeof(priority_factors_object_t));
-
+ }
if (!job_ptr->prio_factors->priority_fs) {
if (fuzzy_equal(assoc_ptr->usage->usage_efctv, NO_VAL))
priority_g_set_assoc_usage(assoc_ptr);
@@ -3625,8 +3829,9 @@
(long double)assoc_ptr->usage->
shares_norm);
}
- if (job_ptr->prio_factors->priority_fs < qos_ptr->usage_thres) {
- info("Job %u exceeds usage threahold", job_ptr->job_id);
+ if (job_ptr->prio_factors->priority_fs < qos_ptr->usage_thres){
+ debug2("Job %u exceeds usage threahold",
+ job_ptr->job_id);
fail_reason = WAIT_QOS_THRES;
}
}
@@ -3662,13 +3867,9 @@
bool valid;
slurmdb_qos_rec_t qos_rec, *qos_ptr;
uint32_t user_submit_priority;
- uint16_t limit_set_max_cpus = 0;
- uint16_t limit_set_max_nodes = 0;
- uint16_t limit_set_min_cpus = 0;
- uint16_t limit_set_min_nodes = 0;
- uint16_t limit_set_time = 0;
static uint32_t node_scaling = 1;
static uint32_t cpus_per_mp = 1;
+ acct_policy_limit_set_t acct_policy_limit_set;
#ifdef HAVE_BG
uint16_t geo[SYSTEM_DIMENSIONS];
@@ -3686,6 +3887,7 @@
sub_mp_system = 1;
}
#endif
+ memset(&acct_policy_limit_set, 0, sizeof(acct_policy_limit_set_t));
*job_pptr = (struct job_record *) NULL;
/*
@@ -3712,7 +3914,8 @@
error_code = ESLURM_INVALID_NODE_NAME;
goto cleanup_fail;
}
- if (job_desc->contiguous)
+ if ((job_desc->contiguous != (uint16_t) NO_VAL) &&
+ (job_desc->contiguous))
bit_fill_gaps(req_bitmap);
i = bit_set_count(req_bitmap);
if (i > job_desc->min_nodes)
@@ -3731,6 +3934,15 @@
#endif
}
}
+#ifdef HAVE_CRAY
+ if ((job_desc->max_nodes == 0) && (job_desc->script == NULL)) {
+#else
+ if (job_desc->max_nodes == 0) {
+#endif
+ info("_job_create: max_nodes == 0");
+ error_code = ESLURM_INVALID_NODE_COUNT;
+ goto cleanup_fail;
+ }
error_code = _valid_job_part(job_desc, submit_uid, req_bitmap,
&part_ptr, &part_ptr_list);
@@ -3749,9 +3961,9 @@
}
memset(&assoc_rec, 0, sizeof(slurmdb_association_rec_t));
- assoc_rec.uid = job_desc->user_id;
- assoc_rec.partition = part_ptr->name;
assoc_rec.acct = job_desc->account;
+ assoc_rec.partition = part_ptr->name;
+ assoc_rec.uid = job_desc->user_id;
if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
accounting_enforce, &assoc_ptr)) {
@@ -3791,7 +4003,9 @@
qos_rec.name = "standby";
}
- qos_ptr = _determine_and_validate_qos(assoc_ptr, &qos_rec, &qos_error);
+ qos_ptr = _determine_and_validate_qos(
+ job_desc->reservation, assoc_ptr, false, &qos_rec, &qos_error);
+
if (qos_error != SLURM_SUCCESS) {
error_code = qos_error;
goto cleanup_fail;
@@ -3799,11 +4013,9 @@
if ((accounting_enforce & ACCOUNTING_ENFORCE_LIMITS) &&
(!acct_policy_validate(job_desc, part_ptr,
- assoc_ptr, qos_ptr,
- &limit_set_max_cpus,
- &limit_set_max_nodes,
- &limit_set_time, 0))) {
- info("_job_create: exceeded association's node or time limit "
+ assoc_ptr, qos_ptr, NULL,
+ &acct_policy_limit_set, 0))) {
+ info("_job_create: exceeded association/qos's limit "
"for user %u", job_desc->user_id);
error_code = ESLURM_ACCOUNTING_POLICY;
goto cleanup_fail;
@@ -3816,7 +4028,12 @@
debug3("before alteration asking for nodes %u-%u cpus %u-%u",
job_desc->min_nodes, job_desc->max_nodes,
job_desc->min_cpus, job_desc->max_cpus);
- select_g_alter_node_cnt(SELECT_SET_NODE_CNT, job_desc);
+ if (select_g_alter_node_cnt(SELECT_SET_NODE_CNT, job_desc)
+ != SLURM_SUCCESS) {
+ error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
+ goto cleanup_fail;
+ }
+
debug3("after alteration asking for nodes %u-%u cpus %u-%u",
job_desc->min_nodes, job_desc->max_nodes,
job_desc->min_cpus, job_desc->max_cpus);
@@ -3885,17 +4102,14 @@
}
select_g_select_jobinfo_get(job_desc->select_jobinfo,
SELECT_JOBDATA_CONN_TYPE, &conn_type);
- if (conn_type[0] == (uint16_t) NO_VAL) {
- conn_type[0] = (uint16_t) SELECT_NAV;
- select_g_select_jobinfo_set(job_desc->select_jobinfo,
- SELECT_JOBDATA_CONN_TYPE,
- &conn_type);
- } else if(((conn_type[0] >= SELECT_SMALL)
- && ((job_desc->min_cpus >= cpus_per_mp) && !sub_mp_system))
- || (!sub_mp_system
- && ((conn_type[0] == SELECT_TORUS)
- || (conn_type[0] == SELECT_MESH))
- && (job_desc->min_cpus < cpus_per_mp))) {
+
+ if ((conn_type[0] != (uint16_t) NO_VAL)
+ && (((conn_type[0] >= SELECT_SMALL)
+ && ((job_desc->min_cpus >= cpus_per_mp) && !sub_mp_system))
+ || (!sub_mp_system
+ && ((conn_type[0] == SELECT_TORUS)
+ || (conn_type[0] == SELECT_MESH))
+ && (job_desc->min_cpus < cpus_per_mp)))) {
/* check to make sure we have a valid conn_type with
* the cpu count */
info("Job's cpu count at %u makes our conn_type "
@@ -3903,15 +4117,17 @@
job_desc->min_cpus, conn_type_string(conn_type[0]));
error_code = ESLURM_INVALID_NODE_COUNT;
goto cleanup_fail;
- } else if ((conn_type[0] == SELECT_TORUS)
- || (conn_type[0] == SELECT_MESH)) {
- int dim;
- for (dim=1; dim<SYSTEM_DIMENSIONS; dim++)
- conn_type[dim] = conn_type[0];
- select_g_select_jobinfo_set(job_desc->select_jobinfo,
- SELECT_JOBDATA_CONN_TYPE,
- &conn_type);
}
+
+ /* make sure we reset all the NO_VAL's to NAV's */
+ for (i=0; i<SYSTEM_DIMENSIONS; i++) {
+ if (conn_type[i] == (uint16_t)NO_VAL)
+ conn_type[i] = SELECT_NAV;
+ }
+ select_g_select_jobinfo_set(job_desc->select_jobinfo,
+ SELECT_JOBDATA_CONN_TYPE,
+ &conn_type);
+
#endif
if (job_desc->max_nodes == NO_VAL)
@@ -3953,11 +4169,13 @@
goto cleanup_fail;
}
- job_ptr->limit_set_max_cpus = limit_set_max_cpus;
- job_ptr->limit_set_max_nodes = limit_set_max_nodes;
- job_ptr->limit_set_min_cpus = limit_set_min_cpus;
- job_ptr->limit_set_min_nodes = limit_set_min_nodes;
- job_ptr->limit_set_time = limit_set_time;
+ job_ptr->limit_set_max_cpus = acct_policy_limit_set.max_cpus;
+ job_ptr->limit_set_max_nodes = acct_policy_limit_set.max_nodes;
+ job_ptr->limit_set_min_cpus = acct_policy_limit_set.min_cpus;
+ job_ptr->limit_set_min_nodes = acct_policy_limit_set.min_nodes;
+ job_ptr->limit_set_pn_min_memory = acct_policy_limit_set.pn_min_memory;
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
+ job_ptr->limit_set_qos = acct_policy_limit_set.qos;
job_ptr->assoc_id = assoc_rec.id;
job_ptr->assoc_ptr = (void *) assoc_ptr;
@@ -4035,7 +4253,6 @@
error_code = ESLURM_QOS_THRES;
else
error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
- job_ptr->priority = 1; /* Move to end of queue */
job_ptr->state_reason = fail_reason;
xfree(job_ptr->state_desc);
}
@@ -4181,8 +4398,10 @@
return ESLURM_WRITING_TO_FILE;
}
- if (data == NULL)
+ if (data == NULL) {
+ close(fd);
return SLURM_SUCCESS;
+ }
for (i = 0; i < size; i++) {
nwrite = strlen(data[i]) + 1;
@@ -4326,6 +4545,7 @@
if (rec_cnt == 0) {
*data = NULL;
*size = 0;
+ close(fd);
return;
}
@@ -4696,7 +4916,9 @@
detail_ptr->begin_time = job_desc->begin_time;
job_ptr->select_jobinfo =
select_g_select_jobinfo_copy(job_desc->select_jobinfo);
-
+ select_g_select_jobinfo_set(job_ptr->select_jobinfo,
+ SELECT_JOBDATA_USER_NAME,
+ &job_ptr->user_id);
if (job_desc->ckpt_dir)
detail_ptr->ckpt_dir = xstrdup(job_desc->ckpt_dir);
else
@@ -4814,8 +5036,8 @@
ListIterator job_iterator;
struct job_record *job_ptr;
time_t now = time(NULL);
- time_t old = now - (slurmctld_conf.inactive_limit * 4 / 3) +
- slurmctld_conf.msg_timeout + 1;
+ time_t old = now - ((slurmctld_conf.inactive_limit * 4 / 3) +
+ slurmctld_conf.msg_timeout + 1);
time_t over_run;
int resv_status = 0;
uint64_t job_cpu_usage_mins = 0;
@@ -4853,14 +5075,6 @@
* running, suspended and pending job */
resv_status = job_resv_check(job_ptr);
- if ((job_ptr->priority == 1) && (!IS_JOB_FINISHED(job_ptr))) {
- /* Rather than resetting job priorities whenever a
- * DOWN, DRAINED or non-responsive node is returned to
- * service, we pick them up here. There will be a small
- * delay in restting a job's priority, but the code is
- * a lot cleaner this way. */
- _set_job_prio(job_ptr);
- }
if (!IS_JOB_RUNNING(job_ptr))
continue;
@@ -5237,6 +5451,7 @@
delete_job_details(job_ptr);
xfree(job_ptr->account);
+ xfree(job_ptr->alias_list);
xfree(job_ptr->alloc_node);
xfree(job_ptr->batch_host);
xfree(job_ptr->comment);
@@ -5245,7 +5460,6 @@
xfree(job_ptr->licenses);
if (job_ptr->license_list)
list_destroy(job_ptr->license_list);
- job_ptr->magic = 0;
xfree(job_ptr->mail_user);
xfree(job_ptr->name);
xfree(job_ptr->network);
@@ -5300,6 +5514,7 @@
{
time_t kill_age, min_age, now = time(NULL);;
struct job_record *job_ptr = (struct job_record *)job_entry;
+ void *block_in_use = NULL;
if (IS_JOB_COMPLETING(job_ptr)) {
kill_age = now - (slurmctld_conf.kill_wait +
@@ -5321,6 +5536,12 @@
if (!(IS_JOB_FINISHED(job_ptr)))
return 0; /* Job still active */
+ select_g_select_jobinfo_get(job_ptr->select_jobinfo,
+ SELECT_JOBDATA_BLOCK_PTR,
+ &block_in_use);
+ if (block_in_use)
+ return 0; /* Job hasn't finished on block yet */
+
/* If we don't have a db_index by now and we are running with
the slurmdbd lets put it on the list to be handled later
when it comes back up since we won't get another chance.
@@ -5540,7 +5761,7 @@
packstr(dump_job_ptr->gres, buffer);
packstr(dump_job_ptr->batch_host, buffer);
if (!IS_JOB_COMPLETED(dump_job_ptr) &&
- (show_flags & SHOW_DETAIL) &&
+ (show_flags & SHOW_DETAIL2) &&
((dump_job_ptr->user_id == (uint32_t) uid) ||
validate_slurm_user(uid))) {
char *batch_script = get_job_script(dump_job_ptr);
@@ -5577,7 +5798,7 @@
packstr(dump_job_ptr->wckey, buffer);
pack32(dump_job_ptr->req_switch, buffer);
pack32(dump_job_ptr->wait4switch, buffer);
-
+
packstr(dump_job_ptr->alloc_node, buffer);
if (!IS_JOB_COMPLETING(dump_job_ptr))
pack_bit_fmt(dump_job_ptr->node_bitmap, buffer);
@@ -6129,7 +6350,7 @@
job_fail = true;
}
_reset_step_bitmaps(job_ptr);
- build_node_details(job_ptr); /* set node_addr */
+ build_node_details(job_ptr, false); /* set node_addr */
if (_reset_detail_bitmaps(job_ptr))
job_fail = true;
@@ -6290,11 +6511,10 @@
/*
- * _set_job_prio - set a default job priority
+ * set_job_prio - set a default job priority
* IN job_ptr - pointer to the job_record
- * NOTE: this is a simple prototype, we need to re-establish value on restart
*/
-static void _set_job_prio(struct job_record *job_ptr)
+extern void set_job_prio(struct job_record *job_ptr)
{
xassert(job_ptr);
xassert (job_ptr->magic == JOB_MAGIC);
@@ -6326,34 +6546,13 @@
job_iterator = list_iterator_create(job_list);
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
- if (job_ptr->priority > 1)
+ if (job_ptr->priority)
job_ptr->priority += prio_boost;
}
list_iterator_destroy(job_iterator);
lowest_prio += prio_boost;
}
-
-/* After a node is returned to service, reset the priority of jobs
- * which may have been held due to that node being unavailable */
-extern void reset_job_priority(void)
-{
- ListIterator job_iterator;
- struct job_record *job_ptr;
- int count = 0;
-
- job_iterator = list_iterator_create(job_list);
- while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
- if ((job_ptr->priority == 1) && (!IS_JOB_FINISHED(job_ptr))) {
- _set_job_prio(job_ptr);
- count++;
- }
- }
- list_iterator_destroy(job_iterator);
- if (count)
- last_job_update = time(NULL);
-}
-
/*
* _top_priority - determine if any other job has a higher priority than the
* specified job
@@ -6374,7 +6573,7 @@
* execute on different sets of nodes. While sched/backfill would
* eventually start the job if delayed here based upon priority,
* that could delay the initiation of a job by a few seconds. */
- if(static_part == (uint16_t)NO_VAL) {
+ if (static_part == (uint16_t)NO_VAL) {
/* Since this never changes we can just set it once
and not look at it again. */
rc = select_g_get_info_from_plugin(SELECT_STATIC_PART, job_ptr,
@@ -6403,7 +6602,10 @@
* indicative of job requeue */
continue;
}
- if (!job_independent(job_ptr2, 0))
+ if (!acct_policy_job_runnable_state(job_ptr2) ||
+ !misc_policy_job_runnable_state(job_ptr2) ||
+ !part_policy_job_runnable_state(job_ptr2) ||
+ !job_independent(job_ptr2, 0))
continue;
if ((job_ptr2->resv_name && (!job_ptr->resv_name)) ||
((!job_ptr2->resv_name) && job_ptr->resv_name))
@@ -6446,7 +6648,7 @@
job_ptr->state_reason = WAIT_HELD;
xfree(job_ptr->state_desc);
}
- } else if (job_ptr->priority != 1) { /* not system hold */
+ } else if (job_ptr->state_reason == WAIT_NO_REASON) {
job_ptr->state_reason = WAIT_PRIORITY;
xfree(job_ptr->state_desc);
}
@@ -6494,7 +6696,7 @@
{
int error_code = SLURM_SUCCESS;
enum job_state_reason fail_reason;
- bool authorized = false;
+ bool authorized = false, admin = false;
uint32_t save_min_nodes = 0, save_max_nodes = 0;
uint32_t save_min_cpus = 0, save_max_cpus = 0;
struct job_record *job_ptr;
@@ -6504,11 +6706,7 @@
time_t now = time(NULL);
multi_core_data_t *mc_ptr = NULL;
bool update_accounting = false;
- uint16_t limit_set_max_cpus = 0;
- uint16_t limit_set_max_nodes = 0;
- uint16_t limit_set_min_cpus = 0;
- uint16_t limit_set_min_nodes = 0;
- uint16_t limit_set_time = 0;
+ acct_policy_limit_set_t acct_policy_limit_set;
#ifdef HAVE_BG
uint16_t conn_type = (uint16_t) NO_VAL;
@@ -6525,6 +6723,7 @@
select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
&cpus_per_node);
#endif
+ memset(&acct_policy_limit_set, 0, sizeof(acct_policy_limit_set_t));
/* Make sure anything that may be put in the database will be
lower case */
@@ -6543,7 +6742,8 @@
if (error_code != SLURM_SUCCESS)
return error_code;
- authorized = validate_operator(uid) || assoc_mgr_is_user_acct_coord(
+ admin = validate_operator(uid);
+ authorized = admin || assoc_mgr_is_user_acct_coord(
acct_db_conn, uid, job_ptr->account);
if ((job_ptr->user_id != uid) && !authorized) {
error("Security violation, JOB_UPDATE RPC from uid %d",
@@ -6718,25 +6918,55 @@
if (job_specs->partition) {
List part_ptr_list = NULL;
+ bool old_res = false;
if (!IS_JOB_PENDING(job_ptr)) {
error_code = ESLURM_DISABLED;
goto fini;
}
- if (job_specs->min_nodes == NO_VAL)
+ if (job_specs->min_nodes == NO_VAL) {
+#ifdef HAVE_BG
+ select_g_select_jobinfo_get(
+ job_ptr->select_jobinfo,
+ SELECT_JOBDATA_NODE_CNT,
+ &job_specs->min_nodes);
+#else
job_specs->min_nodes = detail_ptr->min_nodes;
+#endif
+ }
if ((job_specs->max_nodes == NO_VAL) &&
- (detail_ptr->max_nodes != 0))
+ (detail_ptr->max_nodes != 0)) {
+#ifdef HAVE_BG
+ select_g_select_jobinfo_get(
+ job_ptr->select_jobinfo,
+ SELECT_JOBDATA_NODE_CNT,
+ &job_specs->max_nodes);
+#else
job_specs->max_nodes = detail_ptr->max_nodes;
+#endif
+ }
+
if ((job_specs->time_min == NO_VAL) &&
(job_ptr->time_min != 0))
job_specs->time_min = job_ptr->time_min;
if (job_specs->time_limit == NO_VAL)
job_specs->time_limit = job_ptr->time_limit;
+ if (!job_specs->reservation
+ || job_specs->reservation[0] == '\0') {
+ /* just incase the reservation is '\0' */
+ xfree(job_specs->reservation);
+ job_specs->reservation = job_ptr->resv_name;
+ old_res = true;
+ }
+
error_code = _valid_job_part(job_specs, uid,
job_ptr->details->req_node_bitmap,
&tmp_part_ptr, &part_ptr_list);
+
+ if (old_res)
+ job_specs->reservation = NULL;
+
if (error_code != SLURM_SUCCESS)
;
else if ((tmp_part_ptr->state_up & PARTITION_SUBMIT) == 0)
@@ -6745,9 +6975,9 @@
slurmdb_association_rec_t assoc_rec;
memset(&assoc_rec, 0,
sizeof(slurmdb_association_rec_t));
- assoc_rec.uid = job_ptr->user_id;
- assoc_rec.partition = tmp_part_ptr->name;
assoc_rec.acct = job_ptr->account;
+ assoc_rec.partition = tmp_part_ptr->name;
+ assoc_rec.uid = job_ptr->user_id;
if (assoc_mgr_fill_in_assoc(
acct_db_conn, &assoc_rec,
accounting_enforce,
@@ -6793,10 +7023,18 @@
job_ptr->comment, job_specs->job_id);
if (wiki_sched && strstr(job_ptr->comment, "QOS:")) {
- slurmdb_qos_rec_t qos_rec;
if (!IS_JOB_PENDING(job_ptr))
error_code = ESLURM_DISABLED;
else {
+ slurmdb_qos_rec_t qos_rec;
+ slurmdb_qos_rec_t *new_qos_ptr;
+ char *resv_name;
+ if (job_specs->reservation
+ && job_specs->reservation[0] != '\0')
+ resv_name = job_specs->reservation;
+ else
+ resv_name = job_ptr->resv_name;
+
memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t));
if (strstr(job_ptr->comment,
"FLAGS:PREEMPTOR"))
@@ -6805,12 +7043,24 @@
"FLAGS:PREEMPTEE"))
qos_rec.name = "standby";
- job_ptr->qos_ptr = _determine_and_validate_qos(
- job_ptr->assoc_ptr, &qos_rec,
- &error_code);
+ new_qos_ptr = _determine_and_validate_qos(
+ resv_name, job_ptr->assoc_ptr,
+ authorized, &qos_rec, &error_code);
if (error_code == SLURM_SUCCESS) {
- job_ptr->qos_id = qos_rec.id;
- update_accounting = true;
+ info("update_job: setting qos to %s "
+ "for job_id %u",
+ job_specs->qos, job_specs->job_id);
+ if (job_ptr->qos_id != qos_rec.id) {
+ job_ptr->qos_id = qos_rec.id;
+ job_ptr->qos_ptr = new_qos_ptr;
+ if (authorized)
+ job_ptr->limit_set_qos =
+ ADMIN_SET_LIMIT;
+ else
+ job_ptr->limit_set_qos
+ = 0;
+ update_accounting = true;
+ }
}
}
}
@@ -6819,21 +7069,39 @@
goto fini;
if (job_specs->qos) {
- slurmdb_qos_rec_t qos_rec;
if (!IS_JOB_PENDING(job_ptr))
error_code = ESLURM_DISABLED;
else {
- info("update_job: setting qos to %s for job_id %u",
- job_specs->qos, job_specs->job_id);
+ slurmdb_qos_rec_t qos_rec;
+ slurmdb_qos_rec_t *new_qos_ptr;
+ char *resv_name;
+
+ if (job_specs->reservation
+ && job_specs->reservation[0] != '\0')
+ resv_name = job_specs->reservation;
+ else
+ resv_name = job_ptr->resv_name;
memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t));
qos_rec.name = job_specs->qos;
- job_ptr->qos_ptr = _determine_and_validate_qos(
- job_ptr->assoc_ptr, &qos_rec, &error_code);
+ new_qos_ptr = _determine_and_validate_qos(
+ resv_name, job_ptr->assoc_ptr,
+ authorized, &qos_rec, &error_code);
if (error_code == SLURM_SUCCESS) {
- job_ptr->qos_id = qos_rec.id;
- update_accounting = true;
+ info("update_job: setting qos to %s "
+ "for job_id %u",
+ job_specs->qos, job_specs->job_id);
+ if (job_ptr->qos_id != qos_rec.id) {
+ job_ptr->qos_id = qos_rec.id;
+ job_ptr->qos_ptr = new_qos_ptr;
+ if (authorized)
+ job_ptr->limit_set_qos =
+ ADMIN_SET_LIMIT;
+ else
+ job_ptr->limit_set_qos = 0;
+ update_accounting = true;
+ }
}
}
}
@@ -6843,11 +7111,10 @@
if (!authorized && (accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)) {
if (!acct_policy_validate(job_specs, job_ptr->part_ptr,
job_ptr->assoc_ptr, job_ptr->qos_ptr,
- &limit_set_max_cpus,
- &limit_set_max_nodes,
- &limit_set_time, 1)) {
- info("update_job: exceeded association's cpu, node or "
- "time limit for user %u", job_specs->user_id);
+ NULL, &acct_policy_limit_set, 1)) {
+ info("update_job: exceeded association's cpu, node, "
+ "memory or time limit for user %u",
+ job_specs->user_id);
error_code = ESLURM_ACCOUNTING_POLICY;
goto fini;
}
@@ -6855,27 +7122,34 @@
/* Perhaps the limit was removed, so we will remove it
since it was imposed previously.
*/
- if (!limit_set_max_cpus && (job_ptr->limit_set_max_cpus == 1))
+ if (!acct_policy_limit_set.max_cpus
+ && (job_ptr->limit_set_max_cpus == 1))
job_ptr->details->max_cpus = NO_VAL;
- if (!limit_set_max_nodes && (job_ptr->limit_set_max_nodes == 1))
+ if (!acct_policy_limit_set.max_nodes
+ && (job_ptr->limit_set_max_nodes == 1))
job_ptr->details->max_nodes = NO_VAL;
- if (!limit_set_time && (job_ptr->limit_set_time == 1))
+ if (!acct_policy_limit_set.time
+ && (job_ptr->limit_set_time == 1))
job_ptr->time_limit = NO_VAL;
if (job_ptr->limit_set_max_cpus != ADMIN_SET_LIMIT)
- job_ptr->limit_set_max_cpus = limit_set_max_cpus;
+ job_ptr->limit_set_max_cpus =
+ acct_policy_limit_set.max_cpus;
if (job_ptr->limit_set_max_nodes != ADMIN_SET_LIMIT)
- job_ptr->limit_set_max_nodes = limit_set_max_nodes;
+ job_ptr->limit_set_max_nodes =
+ acct_policy_limit_set.max_nodes;
if (job_ptr->limit_set_time != ADMIN_SET_LIMIT)
- job_ptr->limit_set_time = limit_set_time;
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
} else if (authorized) {
- limit_set_max_cpus = ADMIN_SET_LIMIT;
- limit_set_max_nodes = ADMIN_SET_LIMIT;
- limit_set_min_cpus = ADMIN_SET_LIMIT;
- limit_set_min_nodes = ADMIN_SET_LIMIT;
- limit_set_time = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.max_cpus = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.max_nodes = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.min_cpus = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.min_nodes = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.pn_min_memory = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.time = ADMIN_SET_LIMIT;
+ acct_policy_limit_set.qos = ADMIN_SET_LIMIT;
}
@@ -6885,7 +7159,11 @@
debug3("update before alteration asking for nodes %u-%u cpus %u-%u",
job_specs->min_nodes, job_specs->max_nodes,
job_specs->min_cpus, job_specs->max_cpus);
- select_g_alter_node_cnt(SELECT_SET_NODE_CNT, job_specs);
+ if (select_g_alter_node_cnt(SELECT_SET_NODE_CNT, job_specs)
+ != SLURM_SUCCESS) {
+ error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
+ goto fini;
+ }
debug3("update after alteration asking for nodes %u-%u cpus %u-%u",
job_specs->min_nodes, job_specs->max_nodes,
job_specs->min_cpus, job_specs->max_cpus);
@@ -6942,16 +7220,16 @@
info("update_job: setting min_cpus from "
"%u to %u for job_id %u",
save_min_cpus, detail_ptr->min_cpus, job_specs->job_id);
- job_ptr->limit_set_min_cpus = limit_set_min_cpus;
+ job_ptr->limit_set_min_cpus = acct_policy_limit_set.min_cpus;
update_accounting = true;
}
if (save_max_cpus && (detail_ptr->max_cpus != save_max_cpus)) {
info("update_job: setting max_cpus from "
"%u to %u for job_id %u",
save_max_cpus, detail_ptr->max_cpus, job_specs->job_id);
- /* Always use the limit_set_* since if set by a
+ /* Always use the acct_policy_limit_set.* since if set by a
* super user it be set correctly */
- job_ptr->limit_set_max_cpus = limit_set_max_cpus;
+ job_ptr->limit_set_max_cpus = acct_policy_limit_set.max_cpus;
update_accounting = true;
}
@@ -7007,13 +7285,14 @@
"min_cpus to %u for "
"job_id %u", detail_ptr->min_cpus,
job_specs->job_id);
- /* Always use the limit_set_*
+ /* Always use the
+ * acct_policy_limit_set.*
* since if set by a
* super user it be set correctly */
job_ptr->limit_set_min_cpus =
- limit_set_min_cpus;
+ acct_policy_limit_set.min_cpus;
job_ptr->limit_set_max_cpus =
- limit_set_max_cpus;
+ acct_policy_limit_set.max_cpus;
}
}
}
@@ -7067,16 +7346,16 @@
info("update_job: setting min_nodes from "
"%u to %u for job_id %u",
save_min_nodes, detail_ptr->min_nodes, job_specs->job_id);
- job_ptr->limit_set_min_nodes = limit_set_min_nodes;
+ job_ptr->limit_set_min_nodes = acct_policy_limit_set.min_nodes;
update_accounting = true;
}
if (save_max_nodes && (save_max_nodes != detail_ptr->max_nodes)) {
info("update_job: setting max_nodes from "
"%u to %u for job_id %u",
save_max_nodes, detail_ptr->max_nodes, job_specs->job_id);
- /* Always use the limit_set_* since if set by a
+ /* Always use the acct_policy_limit_set.* since if set by a
* super user it be set correctly */
- job_ptr->limit_set_max_nodes = limit_set_max_nodes;
+ job_ptr->limit_set_max_nodes = acct_policy_limit_set.max_nodes;
update_accounting = true;
}
@@ -7115,9 +7394,9 @@
info("sched: update_job: setting time_limit to %u for "
"job_id %u", job_specs->time_limit,
job_specs->job_id);
- /* Always use the limit_set_* since if set by a
- * super user it be set correctly */
- job_ptr->limit_set_time = limit_set_time;
+ /* Always use the acct_policy_limit_set.*
+ * since if set by a super user it be set correctly */
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
update_accounting = true;
} else if (IS_JOB_PENDING(job_ptr) && job_ptr->part_ptr &&
(job_ptr->part_ptr->max_time >=
@@ -7126,9 +7405,9 @@
info("sched: update_job: setting time_limit to %u for "
"job_id %u", job_specs->time_limit,
job_specs->job_id);
- /* Always use the limit_set_* since if set by a
- * super user it be set correctly */
- job_ptr->limit_set_time = limit_set_time;
+ /* Always use the acct_policy_limit_set.*
+ * since if set by a super user it be set correctly */
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
update_accounting = true;
} else {
info("sched: Attempt to increase time limit for job %u",
@@ -7170,9 +7449,9 @@
info("sched: update_job: setting time_limit to %u for "
"job_id %u", job_ptr->time_limit,
job_specs->job_id);
- /* Always use the limit_set_* since if set by a
- * super user it be set correctly */
- job_ptr->limit_set_time = limit_set_time;
+ /* Always use the acct_policy_limit_set.*
+ * since if set by a super user it be set correctly */
+ job_ptr->limit_set_time = acct_policy_limit_set.time;
update_accounting = true;
} else {
info("sched: Attempt to extend end time for job %u",
@@ -7183,15 +7462,37 @@
if (error_code != SLURM_SUCCESS)
goto fini;
+ /* this needs to be after partition and qos checks */
if (job_specs->reservation) {
if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr)) {
error_code = ESLURM_DISABLED;
} else {
int rc;
char *save_resv_name = job_ptr->resv_name;
+ slurmctld_resv_t *save_resv_ptr = job_ptr->resv_ptr;
+
job_ptr->resv_name = job_specs->reservation;
job_specs->reservation = NULL; /* Nothing to free */
rc = validate_job_resv(job_ptr);
+ /* Make sure this job isn't using a partition
+ or qos that requires it to be in a
+ reservation.
+ */
+ if (rc == SLURM_SUCCESS && !job_ptr->resv_name) {
+ struct part_record *part_ptr =
+ job_ptr->part_ptr;
+ slurmdb_qos_rec_t *qos_ptr =
+ (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
+
+ if (part_ptr
+ && part_ptr->flags & PART_FLAG_REQ_RESV)
+ rc = ESLURM_ACCESS_DENIED;
+
+ if (qos_ptr
+ && qos_ptr->flags & QOS_FLAG_REQ_RESV)
+ rc = ESLURM_INVALID_QOS;
+ }
+
if (rc == SLURM_SUCCESS) {
info("sched: update_job: setting reservation "
"to %s for job_id %u", job_ptr->resv_name,
@@ -7202,6 +7503,7 @@
/* Restore reservation info */
job_specs->reservation = job_ptr->resv_name;
job_ptr->resv_name = save_resv_name;
+ job_ptr->resv_ptr = save_resv_ptr;
error_code = rc;
}
}
@@ -7236,7 +7538,7 @@
} else if ((job_ptr->priority == 0) &&
(job_ptr->state_reason == WAIT_HELD_USER)) {
job_ptr->direct_set_prio = 0;
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
info("sched: update_job: releasing user hold "
"for job_id %u", job_specs->job_id);
job_ptr->state_reason = WAIT_NO_REASON;
@@ -7247,7 +7549,7 @@
job_ptr->details->nice = NICE_OFFSET;
if (job_specs->priority == INFINITE) {
job_ptr->direct_set_prio = 0;
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
} else {
job_ptr->direct_set_prio = 1;
job_ptr->priority = job_specs->priority;
@@ -7261,7 +7563,7 @@
(job_specs->alloc_sid ==
ALLOC_SID_USER_HOLD)) {
job_ptr->state_reason = WAIT_HELD_USER;
- } else
+ } else
job_ptr->state_reason = WAIT_HELD;
xfree(job_ptr->state_desc);
} else if ((job_ptr->state_reason == WAIT_HELD) ||
@@ -7303,6 +7605,10 @@
if (job_specs->pn_min_memory != NO_VAL) {
if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL))
error_code = ESLURM_DISABLED;
+ else if (job_specs->pn_min_memory
+ == detail_ptr->pn_min_memory)
+ debug("sched: update_job: new memory limit identical "
+ "to old limit for job %u", job_specs->job_id);
else if (authorized) {
char *entity;
if (job_specs->pn_min_memory & MEM_PER_CPU)
@@ -7315,6 +7621,10 @@
"for job_id %u", entity,
(job_specs->pn_min_memory & (~MEM_PER_CPU)),
job_specs->job_id);
+ /* Always use the acct_policy_limit_set.*
+ * since if set by a super user it be set correctly */
+ job_ptr->limit_set_pn_min_memory =
+ acct_policy_limit_set.pn_min_memory;
} else {
error("sched: Attempt to increase pn_min_memory for "
"job %u", job_specs->job_id);
@@ -7581,7 +7891,7 @@
if (error_code)
goto fini;
} else if ((job_specs->min_nodes == 0) ||
- (job_specs->min_nodes > job_ptr->node_cnt) ||
+ (job_specs->min_nodes > job_ptr->node_cnt) ||
job_ptr->details->expanding_jobid) {
info("sched: Invalid node count (%u) for job %u update",
job_specs->min_nodes, job_specs->job_id);
@@ -7671,7 +7981,7 @@
update_accounting = true;
if ((job_ptr->priority == 1) &&
(detail_ptr->begin_time <= now))
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
slurm_make_time_str(&detail_ptr->begin_time, time_str,
sizeof(time_str));
info("sched: update_job: setting begin to %s for "
@@ -7733,12 +8043,12 @@
if (fail_reason != WAIT_NO_REASON) {
if (fail_reason == WAIT_QOS_THRES)
error_code = ESLURM_QOS_THRES;
- else if (fail_reason == WAIT_PART_TIME_LIMIT ||
- fail_reason == WAIT_PART_NODE_LIMIT)
+ else if ((fail_reason == WAIT_PART_TIME_LIMIT) ||
+ (fail_reason == WAIT_PART_NODE_LIMIT) ||
+ (fail_reason == WAIT_HELD))
error_code = SLURM_SUCCESS;
else
error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
- job_ptr->priority = 1; /* Move to end of queue */
job_ptr->state_reason = fail_reason;
xfree(job_ptr->state_desc);
return error_code;
@@ -7948,7 +8258,7 @@
* If job has a hold then do nothing */
if ((error_code == SLURM_SUCCESS) && (job_ptr->priority > 1) &&
strcmp(slurmctld_conf.priority_type, "priority/basic"))
- _set_job_prio(job_ptr);
+ set_job_prio(job_ptr);
return error_code;
}
@@ -8405,6 +8715,12 @@
if (IS_JOB_FINISHED(job_ptr))
return ESLURM_ALREADY_DONE;
+ if (job_ptr->alias_list && !strcmp(job_ptr->alias_list, "TBD") &&
+ job_ptr->node_bitmap &&
+ (bit_overlap(power_node_bitmap, job_ptr->node_bitmap) == 0)) {
+ set_job_alias_list(job_ptr);
+ }
+
*job_pptr = job_ptr;
return SLURM_SUCCESS;
}
@@ -8649,9 +8965,17 @@
if (!bit_test(job_ptr->node_bitmap, i))
continue;
node_ptr = &node_record_table_ptr[i];
+#ifndef HAVE_BG
+ /* If this is a bluegene system we do not want
+ to mark the entire midplane down if we have
+ an epilog error. This would most likely
+ kill other jobs sharing that midplane and
+ that is not what we want.
+ */
if (return_code)
set_node_down_ptr(node_ptr, "Epilog error");
else
+#endif
make_node_idle(node_ptr, job_ptr);
}
}
@@ -8727,6 +9051,14 @@
xassert(job_ptr);
+#ifdef HAVE_BG
+ /* If on a bluegene system we want to remove the job_resrcs so
+ we don't get an error message about them already existing
+ when the job goes to run again.
+ */
+ if (requeue)
+ free_job_resources(&job_ptr->job_resrcs);
+#endif
acct_policy_remove_job_submit(job_ptr);
if (!IS_JOB_RESIZING(job_ptr)) {
@@ -8760,18 +9092,19 @@
if (IS_JOB_RESIZING(job_ptr))
return;
- if(!job_ptr->assoc_id) {
+ if (!job_ptr->assoc_id) {
slurmdb_association_rec_t assoc_rec;
/* In case accounting enabled after starting the job */
memset(&assoc_rec, 0, sizeof(slurmdb_association_rec_t));
assoc_rec.acct = job_ptr->account;
- assoc_rec.partition = job_ptr->partition;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
assoc_rec.uid = job_ptr->user_id;
- if(!(assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
- accounting_enforce,
- (slurmdb_association_rec_t **)
- &job_ptr->assoc_ptr))) {
+ if (!(assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
+ accounting_enforce,
+ (slurmdb_association_rec_t **)
+ &job_ptr->assoc_ptr))) {
job_ptr->assoc_id = assoc_rec.id;
/* we have to call job start again because the
* associd does not get updated in job complete */
@@ -8796,7 +9129,6 @@
struct job_details *detail_ptr = job_ptr->details;
time_t now = time(NULL);
int depend_rc;
- bool independent = false;
/* Test dependencies first so we can cancel jobs before dependent
* job records get purged (e.g. afterok, afternotok) */
@@ -8837,14 +9169,11 @@
/* Job is eligible to start now */
if (job_ptr->state_reason == WAIT_DEPENDENCY) {
job_ptr->state_reason = WAIT_NO_REASON;
- independent = true;
xfree(job_ptr->state_desc);
}
if ((detail_ptr && (detail_ptr->begin_time == 0) &&
(job_ptr->priority != 0))) {
detail_ptr->begin_time = now;
- if (independent)
- _set_job_prio(job_ptr);
} else if (job_ptr->state_reason == WAIT_TIME) {
job_ptr->state_reason = WAIT_NO_REASON;
xfree(job_ptr->state_desc);
@@ -8872,17 +9201,24 @@
/* Always call select_g_job_ready() so that select/bluegene can
* test and update block state information. */
rc = select_g_job_ready(job_ptr);
-
if (rc == READY_JOB_FATAL)
return ESLURM_INVALID_PARTITION_NAME;
if (rc == READY_JOB_ERROR)
return EAGAIN;
-
if (rc)
rc = READY_NODE_STATE;
+ if (job_ptr->details && job_ptr->details->prolog_running)
+ rc &= (~READY_NODE_STATE);
+
if (IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr))
rc |= READY_JOB_STATE;
+ if ((rc == (READY_NODE_STATE | READY_JOB_STATE)) &&
+ job_ptr->alias_list && !strcmp(job_ptr->alias_list, "TBD") &&
+ job_ptr->node_bitmap &&
+ (bit_overlap(power_node_bitmap, job_ptr->node_bitmap) == 0)) {
+ set_job_alias_list(job_ptr);
+ }
*ready = rc;
return SLURM_SUCCESS;
@@ -8891,6 +9227,21 @@
/* Send specified signal to all steps associated with a job */
static void _signal_job(struct job_record *job_ptr, int signal)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ ListIterator step_iterator;
+ struct step_record *step_ptr;
+ step_iterator = list_iterator_create(job_ptr->step_list);
+ while ((step_ptr = list_next(step_iterator))) {
+ /* Since we have already checked the uid,
+ * we can send this signal as uid 0. */
+ job_step_signal(job_ptr->job_id, step_ptr->step_id,
+ signal, 0);
+ }
+ list_iterator_destroy (step_iterator);
+
+ return;
+#else
+
#ifndef HAVE_FRONT_END
int i;
#endif
@@ -8930,6 +9281,7 @@
agent_args->msg_args = signal_job_msg;
agent_queue_request(agent_args);
return;
+#endif
}
/* Send suspend request to slumrd of all nodes associated with a job */
@@ -9381,12 +9733,12 @@
}
/*
- * job_cancel_by_assoc_id - Cancel all pending and running jobs with a given
+ * job_hold_by_assoc_id - Hold all pending jobs with a given
* association ID. This happens when an association is deleted (e.g. when
* a user is removed from the association database).
- * RET count of cancelled jobs
+ * RET count of held jobs
*/
-extern int job_cancel_by_assoc_id(uint32_t assoc_id)
+extern int job_hold_by_assoc_id(uint32_t assoc_id)
{
int cnt = 0;
ListIterator job_iterator;
@@ -9428,12 +9780,10 @@
if(IS_JOB_FINISHED(job_ptr))
continue;
- info("Association deleted, cancelling job %u",
+ info("Association deleted, holding job %u",
job_ptr->job_id);
- /* make sure the assoc_mgr_lock isn't locked before this. */
- job_signal(job_ptr->job_id, SIGKILL, 0, 0, false);
- job_ptr->state_reason = FAIL_ACCOUNT;
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = FAIL_ACCOUNT;
cnt++;
}
list_iterator_destroy(job_iterator);
@@ -9442,12 +9792,12 @@
}
/*
- * job_cancel_by_qos_id - Cancel all pending and running jobs with a given
+ * job_hold_by_qos_id - Hold all pending jobs with a given
* QOS ID. This happens when a QOS is deleted (e.g. when
* a QOS is removed from the association database).
- * RET count of cancelled jobs
+ * RET count of held jobs
*/
-extern int job_cancel_by_qos_id(uint32_t qos_id)
+extern int job_hold_by_qos_id(uint32_t qos_id)
{
int cnt = 0;
ListIterator job_iterator;
@@ -9482,12 +9832,9 @@
if(IS_JOB_FINISHED(job_ptr))
continue;
- info("QOS deleted, cancelling job %u",
- job_ptr->job_id);
- /* make sure the assoc_mgr_lock isn't locked before this. */
- job_signal(job_ptr->job_id, SIGKILL, 0, 0, false);
- job_ptr->state_reason = FAIL_QOS;
+ info("QOS deleted, holding job %u", job_ptr->job_id);
xfree(job_ptr->state_desc);
+ job_ptr->state_reason = FAIL_QOS;
cnt++;
}
list_iterator_destroy(job_iterator);
@@ -9515,9 +9862,10 @@
memset(&assoc_rec, 0, sizeof(slurmdb_association_rec_t));
- assoc_rec.uid = job_ptr->user_id;
- assoc_rec.partition = job_ptr->partition;
assoc_rec.acct = new_account;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
+ assoc_rec.uid = job_ptr->user_id;
if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
accounting_enforce,
(slurmdb_association_rec_t **)
@@ -9525,9 +9873,9 @@
info("%s: invalid account %s for job_id %u",
module, new_account, job_ptr->job_id);
return ESLURM_INVALID_ACCOUNT;
- } else if(association_based_accounting &&
- !job_ptr->assoc_ptr &&
- !(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
+ } else if (association_based_accounting &&
+ !job_ptr->assoc_ptr &&
+ !(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
/* if not enforcing associations we want to look for
* the default account and use it to avoid getting
* trash in the accounting records.
@@ -9537,7 +9885,7 @@
accounting_enforce,
(slurmdb_association_rec_t **)
&job_ptr->assoc_ptr);
- if(!job_ptr->assoc_ptr) {
+ if (!job_ptr->assoc_ptr) {
debug("%s: we didn't have an association for account "
"'%s' and user '%u', and we can't seem to find "
"a default one either. Keeping new account "
@@ -9637,43 +9985,39 @@
struct job_record *job_ptr;
slurmctld_lock_t job_write_lock = {
NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
- time_t now = time(NULL);
/* send jobs in pending or running state */
lock_slurmctld(job_write_lock);
itr = list_iterator_create(job_list);
while ((job_ptr = list_next(itr))) {
- if(!job_ptr->assoc_id) {
+ if (!job_ptr->assoc_id) {
slurmdb_association_rec_t assoc_rec;
memset(&assoc_rec, 0,
sizeof(slurmdb_association_rec_t));
- assoc_rec.uid = job_ptr->user_id;
- assoc_rec.partition = job_ptr->partition;
assoc_rec.acct = job_ptr->account;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
+ assoc_rec.uid = job_ptr->user_id;
- if(assoc_mgr_fill_in_assoc(
+ if (assoc_mgr_fill_in_assoc(
acct_db_conn, &assoc_rec,
accounting_enforce,
(slurmdb_association_rec_t **)
&job_ptr->assoc_ptr) &&
- (accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
- && (!IS_JOB_FINISHED(job_ptr))) {
- info("Cancelling job %u with "
+ (accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
+ && (!IS_JOB_FINISHED(job_ptr))) {
+ info("Holding job %u with "
"invalid association",
job_ptr->job_id);
- job_ptr->job_state = JOB_CANCELLED;
+ xfree(job_ptr->state_desc);
job_ptr->state_reason = FAIL_ACCOUNT;
- if (IS_JOB_PENDING(job_ptr))
- job_ptr->start_time = now;
- job_ptr->end_time = now;
- job_completion_logger(job_ptr, false);
continue;
} else
job_ptr->assoc_id = assoc_rec.id;
}
/* we only want active, un accounted for jobs */
- if(job_ptr->db_index || IS_JOB_FINISHED(job_ptr))
+ if (job_ptr->db_index || IS_JOB_FINISHED(job_ptr))
continue;
debug("first reg: starting job %u in accounting",
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index f485891..10bb604 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -42,6 +42,10 @@
# include "config.h"
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -90,6 +94,8 @@
static int save_last_part_update = 0;
+extern diag_stats_t slurmctld_diag_stats;
+
/*
* _build_user_job_list - build list of jobs for a given user
* and an optional job name
@@ -166,6 +172,11 @@
job_is_pending = IS_JOB_PENDING(job_ptr);
if (!job_is_pending || IS_JOB_COMPLETING(job_ptr))
continue;
+#ifdef HAVE_FRONT_END
+ /* At least one front-end node up at this point */
+ if (job_ptr->state_reason == WAIT_FRONT_END)
+ job_ptr->state_reason = WAIT_NO_REASON;
+#endif
/* ensure dependency shows current values behind a hold */
job_indepen = job_independent(job_ptr, 0);
if (job_is_pending && clear_start)
@@ -183,7 +194,7 @@
job_reason_string(job_ptr->state_reason),
job_ptr->priority);
continue;
- } else if ((job_ptr->priority == 1) && !job_indepen &&
+ } else if (!job_indepen &&
((job_ptr->state_reason == WAIT_HELD) ||
(job_ptr->state_reason == WAIT_HELD_USER))) {
/* released behind active dependency? */
@@ -221,6 +232,15 @@
"part %s", job_ptr->job_id,
job_ptr->partition);
}
+ if (!part_policy_job_runnable_state(job_ptr)) {
+ if (job_limits_check(&job_ptr) ==
+ WAIT_NO_REASON) {
+ job_ptr->state_reason = WAIT_NO_REASON;
+ xfree(job_ptr->state_desc);
+ } else {
+ continue;
+ }
+ }
_job_queue_append(job_queue, job_ptr,
job_ptr->part_ptr);
}
@@ -273,10 +293,18 @@
ListIterator job_iterator;
slurmctld_lock_t job_write_lock =
{ READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+#ifdef HAVE_BG
+ static uint16_t cpus_per_node = 0;
+ if (!cpus_per_node)
+ select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
+ &cpus_per_node);
+#endif
lock_slurmctld(job_write_lock);
job_iterator = list_iterator_create(job_list);
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
+ uint32_t job_min_nodes, job_max_nodes;
+ uint32_t part_min_nodes, part_max_nodes;
part_ptr = job_ptr->part_ptr;
if (!IS_JOB_PENDING(job_ptr))
continue;
@@ -289,9 +317,20 @@
if ((job_ptr->time_limit != NO_VAL) &&
(job_ptr->time_limit > part_ptr->max_time))
continue;
- if ((job_ptr->details->max_nodes != 0) &&
- ((job_ptr->details->max_nodes < part_ptr->min_nodes) ||
- (job_ptr->details->min_nodes > part_ptr->max_nodes)))
+#ifdef HAVE_BG
+ job_min_nodes = job_ptr->details->min_cpus / cpus_per_node;
+ job_max_nodes = job_ptr->details->max_cpus / cpus_per_node;
+ part_min_nodes = part_ptr->min_nodes_orig;
+ part_max_nodes = part_ptr->max_nodes_orig;
+#else
+ job_min_nodes = job_ptr->details->min_nodes;
+ job_max_nodes = job_ptr->details->max_nodes;
+ part_min_nodes = part_ptr->min_nodes;
+ part_max_nodes = part_ptr->max_nodes;
+#endif
+ if ((job_max_nodes != 0) &&
+ ((job_max_nodes < part_min_nodes) ||
+ (job_min_nodes > part_max_nodes)))
continue;
/* Job's eligible time is set in job_independent() */
if (!job_independent(job_ptr, 0))
@@ -317,6 +356,18 @@
return false;
}
+static void do_diag_stats(struct timeval tv1, struct timeval tv2)
+{
+ if (slurm_diff_tv(&tv1,&tv2) > slurmctld_diag_stats.schedule_cycle_max)
+ slurmctld_diag_stats.schedule_cycle_max = slurm_diff_tv(&tv1,
+ &tv2);
+
+ slurmctld_diag_stats.schedule_cycle_sum += slurm_diff_tv(&tv1, &tv2);
+ slurmctld_diag_stats.schedule_cycle_last = slurm_diff_tv(&tv1, &tv2);
+ slurmctld_diag_stats.schedule_cycle_counter++;
+}
+
+
/*
* schedule - attempt to schedule all pending jobs
* pending jobs for each partition will be scheduled in priority
@@ -355,14 +406,8 @@
DEF_TIMERS;
- sched_start = now;
- if (sched_timeout == 0) {
- sched_timeout = slurm_get_msg_timeout() / 2;
- sched_timeout = MAX(sched_timeout, 1);
- sched_timeout = MIN(sched_timeout, 10);
- }
-
START_TIMER;
+ sched_start = now;
if (sched_update != slurmctld_conf.last_update) {
char *sched_params, *tmp_ptr;
char *sched_type = slurm_get_sched_type();
@@ -390,6 +435,10 @@
}
}
xfree(sched_params);
+
+ sched_timeout = slurm_get_msg_timeout() / 2;
+ sched_timeout = MAX(sched_timeout, 1);
+ sched_timeout = MIN(sched_timeout, 10);
sched_update = slurmctld_conf.last_update;
}
if (job_limit == 0)
@@ -443,6 +492,7 @@
debug("sched: Running job scheduler");
job_queue = build_job_queue(false);
+ slurmctld_diag_stats.schedule_queue_len = list_count(job_queue);
while ((job_queue_rec = list_pop_bottom(job_queue, sort_job_queue2))) {
job_ptr = job_queue_rec->job_ptr;
part_ptr = job_queue_rec->part_ptr;
@@ -456,8 +506,15 @@
job_depth);
break;
}
+
+ slurmctld_diag_stats.schedule_cycle_depth++;
+
if (!IS_JOB_PENDING(job_ptr))
continue; /* started in other partition */
+
+ /* Cycle through partitions usable for this job */
+ job_ptr->part_ptr = part_ptr;
+
if (job_ptr->priority == 0) { /* held */
debug3("sched: JobId=%u. State=%s. Reason=%s. "
"Priority=%u.",
@@ -468,28 +525,57 @@
continue;
}
- /* If a patition update has occurred, then do a limit check. */
- if (save_last_part_update != last_part_update) {
- int fail_reason = job_limits_check(&job_ptr);
- if (fail_reason != WAIT_NO_REASON) {
- job_ptr->state_reason = fail_reason;
- job_ptr->priority = 1;
+ /* Test for valid account, QOS and required nodes on each pass */
+ if (job_ptr->state_reason == FAIL_ACCOUNT) {
+ slurmdb_association_rec_t assoc_rec;
+ memset(&assoc_rec, 0, sizeof(slurmdb_association_rec_t));
+ assoc_rec.acct = job_ptr->account;
+ if (job_ptr->part_ptr)
+ assoc_rec.partition = job_ptr->part_ptr->name;
+ assoc_rec.uid = job_ptr->user_id;
+
+ if (!assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
+ accounting_enforce,
+ (slurmdb_association_rec_t **)
+ &job_ptr->assoc_ptr)) {
+ job_ptr->state_reason = WAIT_NO_REASON;
+ job_ptr->assoc_id = assoc_rec.id;
+ } else {
continue;
}
- } else if ((job_ptr->state_reason == WAIT_PART_TIME_LIMIT) ||
- (job_ptr->state_reason == WAIT_PART_NODE_LIMIT)) {
- job_ptr->start_time = 0;
- job_ptr->priority = 1;
+ }
+ if (job_ptr->qos_id) {
+ slurmdb_association_rec_t *assoc_ptr;
+ assoc_ptr = (slurmdb_association_rec_t *)job_ptr->assoc_ptr;
+ if (assoc_ptr &&
+ !bit_test(assoc_ptr->usage->valid_qos,
+ job_ptr->qos_id)) {
+ info("sched: JobId=%u has invalid QOS",
+ job_ptr->job_id);
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = FAIL_QOS;
continue;
+ } else if (job_ptr->state_reason == FAIL_QOS) {
+ xfree(job_ptr->state_desc);
+ job_ptr->state_reason = WAIT_NO_REASON;
+ }
}
- if (job_ptr->part_ptr != part_ptr) {
- /* Cycle through partitions usable for this job */
- job_ptr->part_ptr = part_ptr;
+
+ if (!acct_policy_job_runnable_state(job_ptr) &&
+ !acct_policy_job_runnable(job_ptr))
+ continue;
+
+ if ((job_ptr->state_reason == WAIT_NODE_NOT_AVAIL) &&
+ job_ptr->details && job_ptr->details->req_node_bitmap &&
+ !bit_super_set(job_ptr->details->req_node_bitmap,
+ avail_node_bitmap)) {
+ continue;
}
+
if ((job_ptr->resv_name == NULL) &&
_failed_partition(job_ptr->part_ptr, failed_parts,
failed_part_cnt)) {
- if (job_ptr->priority != 1) { /* not system hold */
+ if (job_ptr->state_reason == WAIT_NO_REASON) {
job_ptr->state_reason = WAIT_PRIORITY;
xfree(job_ptr->state_desc);
}
@@ -502,9 +588,13 @@
job_ptr->partition);
continue;
}
- if (bit_overlap(avail_node_bitmap,
- job_ptr->part_ptr->node_bitmap) == 0) {
- /* All nodes DRAIN, DOWN, or
+ i = bit_overlap(avail_node_bitmap,
+ job_ptr->part_ptr->node_bitmap);
+ if ((job_ptr->details &&
+ (job_ptr->details->min_nodes != NO_VAL) &&
+ (job_ptr->details->min_nodes > i)) ||
+ (!job_ptr->details && (i == 0))) {
+ /* Too many nodes DRAIN, DOWN, or
* reserved for jobs in higher priority partition */
job_ptr->state_reason = WAIT_RESOURCES;
debug3("sched: JobId=%u. State=%s. Reason=%s. "
@@ -538,13 +628,8 @@
info("sched: JobId=%u has invalid account",
job_ptr->job_id);
last_job_update = time(NULL);
- job_ptr->job_state = JOB_FAILED;
- job_ptr->exit_code = 1;
job_ptr->state_reason = FAIL_ACCOUNT;
xfree(job_ptr->state_desc);
- job_ptr->start_time = job_ptr->end_time = time(NULL);
- job_completion_logger(job_ptr, false);
- delete_job_details(job_ptr);
continue;
}
@@ -616,7 +701,7 @@
} else {
sprintf(tmp_char,"%s",job_ptr->nodes);
}
- info("sched: Allocate JobId=%u BPList=%s",
+ info("sched: Allocate JobId=%u MidplaneList=%s",
job_ptr->job_id, tmp_char);
xfree(ionodes);
#else
@@ -656,6 +741,9 @@
list_destroy(job_queue);
unlock_slurmctld(job_write_lock);
END_TIMER2("schedule");
+
+ do_diag_stats(tv1, tv2);
+
return job_cnt;
}
@@ -675,11 +763,21 @@
job_queue_rec_t *job_rec1 = (job_queue_rec_t *) x;
job_queue_rec_t *job_rec2 = (job_queue_rec_t *) y;
bool has_resv1, has_resv2;
+ static time_t config_update = 0;
+ static bool preemption_enabled = true;
- if (slurm_job_preempt_check(job_rec1, job_rec2))
- return -1;
- if (slurm_job_preempt_check(job_rec2, job_rec1))
- return 1;
+ /* The following block of code is designed to minimize run time in
+ * typical configurations for this frequently executed function. */
+ if (config_update != slurmctld_conf.last_update) {
+ preemption_enabled = slurm_preemption_enabled();
+ config_update = slurmctld_conf.last_update;
+ }
+ if (preemption_enabled) {
+ if (slurm_job_preempt_check(job_rec1, job_rec2))
+ return -1;
+ if (slurm_job_preempt_check(job_rec2, job_rec1))
+ return 1;
+ }
has_resv1 = (job_rec1->job_ptr->resv_id != 0);
has_resv2 = (job_rec2->job_ptr->resv_id != 0);
@@ -712,6 +810,7 @@
launch_msg_ptr->uid = job_ptr->user_id;
launch_msg_ptr->gid = job_ptr->group_id;
launch_msg_ptr->ntasks = job_ptr->details->num_tasks;
+ launch_msg_ptr->alias_list = xstrdup(job_ptr->alias_list);
launch_msg_ptr->nodes = xstrdup(job_ptr->nodes);
launch_msg_ptr->overcommit = job_ptr->details->overcommit;
launch_msg_ptr->open_mode = job_ptr->details->open_mode;
@@ -1274,7 +1373,8 @@
while ((job_q_ptr = (struct job_record *) list_next(job_iterator))) {
if (!IS_JOB_PENDING(job_q_ptr) || !job_q_ptr->details ||
(job_q_ptr->part_ptr != job_ptr->part_ptr) ||
- (job_q_ptr->priority < job_ptr->priority))
+ (job_q_ptr->priority < job_ptr->priority) ||
+ (job_q_ptr->job_id == job_ptr->job_id))
continue;
if (job_q_ptr->details->min_nodes == NO_VAL)
job_size_nodes = 1;
@@ -1374,6 +1474,8 @@
bit_and(avail_bitmap, avail_node_bitmap);
if (rc == SLURM_SUCCESS) {
+ /* On BlueGene systems don't adjust the min/max node limits
+ here. We are working on midplane values. */
min_nodes = MAX(job_ptr->details->min_nodes,
part_ptr->min_nodes);
if (job_ptr->details->max_nodes == 0)
@@ -1667,7 +1769,6 @@
READ_LOCK, READ_LOCK, WRITE_LOCK, NO_LOCK };
bitstr_t *node_bitmap = NULL;
static int last_job_requeue = 0;
-
lock_slurmctld(config_read_lock);
argv[0] = xstrdup(slurmctld_conf.prolog_slurmctld);
argv[1] = NULL;
@@ -1811,7 +1912,7 @@
i = str_ptr - tmp_requested - 1;
} else if (tmp_requested[i] == '&') {
tmp_requested[i] = '\0';
- if ((feature == NULL) || (bracket != 0)) {
+ if (feature == NULL) {
info("Job %u invalid constraint %s",
job_ptr->job_id, detail_ptr->features);
xfree(tmp_requested);
@@ -1820,7 +1921,10 @@
feat = xmalloc(sizeof(struct feature_record));
feat->name = xstrdup(feature);
feat->count = count;
- feat->op_code = FEATURE_OP_AND;
+ if (bracket)
+ feat->op_code = FEATURE_OP_XAND;
+ else
+ feat->op_code = FEATURE_OP_AND;
list_append(detail_ptr->feature_list, feat);
feature = NULL;
count = 0;
@@ -1911,7 +2015,8 @@
feat_iter = list_iterator_create(feature_list);
while ((feat_ptr = (struct feature_record *)list_next(feat_iter))) {
- if (feat_ptr->op_code == FEATURE_OP_XOR) {
+ if ((feat_ptr->op_code == FEATURE_OP_XOR) ||
+ (feat_ptr->op_code == FEATURE_OP_XAND)) {
if (bracket == 0)
xstrcat(buf, "[");
bracket = 1;
@@ -1923,11 +2028,14 @@
snprintf(tmp, sizeof(tmp), "*%u", feat_ptr->count);
xstrcat(buf, tmp);
}
- if (bracket && (feat_ptr->op_code != FEATURE_OP_XOR)) {
+ if (bracket &&
+ ((feat_ptr->op_code != FEATURE_OP_XOR) &&
+ (feat_ptr->op_code != FEATURE_OP_XAND))) {
xstrcat(buf, "]");
bracket = 0;
}
- if (feat_ptr->op_code == FEATURE_OP_AND)
+ if ((feat_ptr->op_code == FEATURE_OP_AND) ||
+ (feat_ptr->op_code == FEATURE_OP_XAND))
xstrcat(buf, "&");
else if ((feat_ptr->op_code == FEATURE_OP_OR) ||
(feat_ptr->op_code == FEATURE_OP_XOR))
diff --git a/src/slurmctld/job_submit.c b/src/slurmctld/job_submit.c
index 7ded617..2def3b5 100644
--- a/src/slurmctld/job_submit.c
+++ b/src/slurmctld/job_submit.c
@@ -93,6 +93,7 @@
static slurm_submit_context_t *submit_context = NULL;
static char *submit_plugin_list = NULL;
static pthread_mutex_t submit_context_lock = PTHREAD_MUTEX_INITIALIZER;
+static bool init_run = false;
static int _load_submit_plugin(char *plugin_name,
slurm_submit_context_t *plugin_context)
@@ -196,6 +197,9 @@
int rc = SLURM_SUCCESS;
char *last = NULL, *names, *one_name;
+ if (init_run && (submit_context_cnt >= 0))
+ return rc;
+
slurm_mutex_lock(&submit_context_lock);
if (submit_context_cnt >= 0)
goto fini;
@@ -219,6 +223,7 @@
one_name = strtok_r(NULL, ",", &last);
}
xfree(names);
+ init_run = true;
fini: slurm_mutex_unlock(&submit_context_lock);
return rc;
@@ -237,6 +242,7 @@
if (submit_context_cnt < 0)
goto fini;
+ init_run = false;
for (i=0; i<submit_context_cnt; i++) {
j = _unload_submit_plugin(submit_context + i);
if (j != SLURM_SUCCESS)
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index ae704ea..714e3cf 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -70,6 +70,7 @@
#include "src/slurmctld/locks.h"
#include "src/slurmctld/ping_nodes.h"
#include "src/slurmctld/proc_req.h"
+#include "src/slurmctld/reservation.h"
#include "src/slurmctld/sched_plugin.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/state_save.h"
@@ -81,8 +82,9 @@
#define MAX_RETRIES 10
/* Change NODE_STATE_VERSION value when changing the state save format */
-#define NODE_STATE_VERSION "VER004"
-#define NODE_2_2_STATE_VERSION "VER004" /* SLURM version 2.2 */
+#define NODE_STATE_VERSION "VER005"
+#define NODE_2_4_STATE_VERSION "VER005" /* SLURM version 2.4 */
+#define NODE_2_2_STATE_VERSION "VER004" /* SLURM version 2.2 & 2.3 */
#define NODE_2_1_STATE_VERSION "VER003" /* SLURM version 2.1 */
/* Global variables */
@@ -92,6 +94,14 @@
bitstr_t *power_node_bitmap = NULL; /* bitmap of powered down nodes */
bitstr_t *share_node_bitmap = NULL; /* bitmap of sharable nodes */
bitstr_t *up_node_bitmap = NULL; /* bitmap of non-down nodes */
+bool load_2_4_state = false; /* There was a bug in 2.4.0
+ * where the job state version
+ * wasn't incremented
+ * correctly. Luckly the node
+ * state was. We will use it
+ * to set the version
+ * correctly in the job.
+ */
static void _dump_node_state (struct node_record *dump_node_ptr,
Buf buffer);
@@ -204,7 +214,9 @@
static void
_dump_node_state (struct node_record *dump_node_ptr, Buf buffer)
{
+ packstr (dump_node_ptr->comm_name, buffer);
packstr (dump_node_ptr->name, buffer);
+ packstr (dump_node_ptr->node_hostname, buffer);
packstr (dump_node_ptr->reason, buffer);
packstr (dump_node_ptr->features, buffer);
packstr (dump_node_ptr->gres, buffer);
@@ -262,6 +274,7 @@
*/
extern int load_all_node_state ( bool state_only )
{
+ char *comm_name = NULL, *node_hostname = NULL;
char *node_name = NULL, *reason = NULL, *data = NULL, *state_file;
char *features = NULL, *gres = NULL;
int data_allocated, data_read = 0, error_code = 0, node_cnt = 0;
@@ -318,10 +331,12 @@
safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
debug3("Version string in node_state header is %s", ver_str);
- if(ver_str) {
- if(!strcmp(ver_str, NODE_STATE_VERSION)) {
+ if (ver_str) {
+ if (!strcmp(ver_str, NODE_STATE_VERSION)) {
protocol_version = SLURM_PROTOCOL_VERSION;
- } else if(!strcmp(ver_str, NODE_2_1_STATE_VERSION)) {
+ } else if (!strcmp(ver_str, NODE_2_2_STATE_VERSION)) {
+ protocol_version = SLURM_2_2_PROTOCOL_VERSION;
+ } else if (!strcmp(ver_str, NODE_2_1_STATE_VERSION)) {
protocol_version = SLURM_2_1_PROTOCOL_VERSION;
}
}
@@ -336,11 +351,36 @@
}
xfree(ver_str);
+ if (protocol_version == SLURM_2_4_PROTOCOL_VERSION)
+ load_2_4_state = true;
+
safe_unpack_time (&time_stamp, buffer);
while (remaining_buf (buffer) > 0) {
uint16_t base_state;
- if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ safe_unpackstr_xmalloc (&comm_name, &name_len, buffer);
+ safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
+ safe_unpackstr_xmalloc (&node_hostname,
+ &name_len, buffer);
+ safe_unpackstr_xmalloc (&reason, &name_len, buffer);
+ safe_unpackstr_xmalloc (&features, &name_len, buffer);
+ safe_unpackstr_xmalloc (&gres, &name_len, buffer);
+ safe_unpack16 (&node_state, buffer);
+ safe_unpack16 (&cpus, buffer);
+ safe_unpack16 (&sockets, buffer);
+ safe_unpack16 (&cores, buffer);
+ safe_unpack16 (&threads, buffer);
+ safe_unpack32 (&real_memory, buffer);
+ safe_unpack32 (&tmp_disk, buffer);
+ safe_unpack32 (&reason_uid, buffer);
+ safe_unpack_time (&reason_time, buffer);
+ if (gres_plugin_node_state_unpack(
+ &gres_list, buffer, node_name,
+ protocol_version) != SLURM_SUCCESS)
+ goto unpack_error;
+ base_state = node_state & NODE_STATE_BASE;
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
safe_unpackstr_xmalloc (&reason, &name_len, buffer);
safe_unpackstr_xmalloc (&features, &name_len, buffer);
@@ -359,7 +399,7 @@
protocol_version) != SLURM_SUCCESS)
goto unpack_error;
base_state = node_state & NODE_STATE_BASE;
- } else if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
+ } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
safe_unpackstr_xmalloc (&reason, &name_len, buffer);
safe_unpackstr_xmalloc (&features, &name_len, buffer);
@@ -400,7 +440,31 @@
} else if (state_only) {
uint16_t orig_flags;
node_cnt++;
- if (IS_NODE_UNKNOWN(node_ptr)) {
+ if (IS_NODE_CLOUD(node_ptr)) {
+ if ((!power_save_mode) &&
+ ((node_state & NODE_STATE_POWER_SAVE) ||
+ (node_state & NODE_STATE_POWER_UP))) {
+ node_state &= (~NODE_STATE_POWER_SAVE);
+ node_state &= (~NODE_STATE_POWER_UP);
+ if (hs)
+ hostset_insert(hs, node_name);
+ else
+ hs = hostset_create(node_name);
+ }
+ if (comm_name && node_hostname) {
+ /* Recover NodeAddr and NodeHostName */
+ xfree(node_ptr->comm_name);
+ node_ptr->comm_name = comm_name;
+ comm_name = NULL; /* Nothing to free */
+ xfree(node_ptr->node_hostname);
+ node_ptr->node_hostname = node_hostname;
+ node_hostname = NULL; /* Nothing to free */
+ slurm_reset_alias(node_ptr->name,
+ node_ptr->comm_name,
+ node_ptr->node_hostname);
+ }
+ node_ptr->node_state = node_state;
+ } else if (IS_NODE_UNKNOWN(node_ptr)) {
if (base_state == NODE_STATE_DOWN) {
orig_flags = node_ptr->node_state &
NODE_STATE_FLAGS;
@@ -469,6 +533,19 @@
else
hs = hostset_create(node_name);
}
+ if (IS_NODE_CLOUD(node_ptr) &&
+ comm_name && node_hostname) {
+ /* Recover NodeAddr and NodeHostName */
+ xfree(node_ptr->comm_name);
+ node_ptr->comm_name = comm_name;
+ comm_name = NULL; /* Nothing to free */
+ xfree(node_ptr->node_hostname);
+ node_ptr->node_hostname = node_hostname;
+ node_hostname = NULL; /* Nothing to free */
+ slurm_reset_alias(node_ptr->name,
+ node_ptr->comm_name,
+ node_ptr->node_hostname);
+ }
node_ptr->node_state = node_state;
xfree(node_ptr->reason);
node_ptr->reason = reason;
@@ -492,11 +569,12 @@
node_ptr->real_memory = real_memory;
node_ptr->tmp_disk = tmp_disk;
node_ptr->last_response = (time_t) 0;
- node_ptr->last_idle = now;
}
- if (node_ptr)
+ if (node_ptr) {
+ node_ptr->last_idle = now;
select_g_update_node_state(node_ptr);
+ }
xfree(features);
xfree(gres);
@@ -504,6 +582,8 @@
list_destroy(gres_list);
gres_list = NULL;
}
+ xfree (comm_name);
+ xfree (node_hostname);
xfree (node_name);
xfree(reason);
}
@@ -615,7 +695,11 @@
if (((show_flags & SHOW_ALL) == 0) && (uid != 0) &&
(_node_is_hidden(node_ptr)))
hidden = true;
- else if (IS_NODE_FUTURE(node_ptr))
+ else if (IS_NODE_FUTURE(node_ptr) &&
+ !IS_NODE_MAINT(node_ptr)) /* reboot req sent */
+ hidden = true;
+ else if (IS_NODE_CLOUD(node_ptr) &&
+ IS_NODE_POWER_SAVE(node_ptr))
hidden = true;
else if ((node_ptr->name == NULL) ||
(node_ptr->name[0] == '\0'))
@@ -664,6 +748,10 @@
packstr (dump_node_ptr->node_hostname, buffer);
packstr (dump_node_ptr->comm_name, buffer);
pack16 (dump_node_ptr->node_state, buffer);
+ /* On a bluegene system always use the regular node
+ * infomation not what is in the config_ptr.
+ */
+#ifndef HAVE_BG
if (slurmctld_conf.fast_schedule) {
/* Only data from config_record used for scheduling */
pack16(dump_node_ptr->config_ptr->cpus, buffer);
@@ -673,6 +761,7 @@
pack32(dump_node_ptr->config_ptr->real_memory, buffer);
pack32(dump_node_ptr->config_ptr->tmp_disk, buffer);
} else {
+#endif
/* Individual node data used for scheduling */
pack16(dump_node_ptr->cpus, buffer);
pack16(dump_node_ptr->sockets, buffer);
@@ -680,7 +769,9 @@
pack16(dump_node_ptr->threads, buffer);
pack32(dump_node_ptr->real_memory, buffer);
pack32(dump_node_ptr->tmp_disk, buffer);
+#ifndef HAVE_BG
}
+#endif
pack32(dump_node_ptr->config_ptr->weight, buffer);
pack32(dump_node_ptr->reason_uid, buffer);
@@ -792,6 +883,8 @@
continue;
if (IS_NODE_FUTURE(node_ptr))
continue;
+ if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr))
+ continue;
if (node_ptr->port == 0)
node_ptr->port = slurmctld_conf.slurmd_port;
slurm_set_addr (&node_ptr->slurm_addr,
@@ -815,28 +908,64 @@
*/
int update_node ( update_node_msg_t * update_node_msg )
{
- int error_code = 0, node_inx;
+ int error_code = 0, node_cnt, node_inx;
struct node_record *node_ptr = NULL;
char *this_node_name = NULL;
- hostlist_t host_list;
- uint16_t base_state = 0, state_val;
+ hostlist_t host_list, hostaddr_list = NULL, hostname_list = NULL;
+ uint16_t base_state = 0, node_flags, state_val;
time_t now = time(NULL);
- if (update_node_msg -> node_names == NULL ) {
- error ("update_node: invalid node name %s",
+ if (update_node_msg->node_names == NULL ) {
+ info("update_node: invalid node name %s",
update_node_msg -> node_names );
return ESLURM_INVALID_NODE_NAME;
}
- if ( (host_list = hostlist_create (update_node_msg -> node_names))
- == NULL) {
- error ("hostlist_create error on %s: %m",
- update_node_msg -> node_names);
+ host_list = hostlist_create(update_node_msg->node_names);
+ if (host_list == NULL) {
+ info("update_node: hostlist_create error on %s: %m",
+ update_node_msg->node_names);
return ESLURM_INVALID_NODE_NAME;
}
+ node_cnt = hostlist_count(host_list);
+
+ if (update_node_msg->node_addr) {
+ hostaddr_list = hostlist_create(update_node_msg->node_addr);
+ if (hostaddr_list == NULL) {
+ info("update_node: hostlist_create error on %s: %m",
+ update_node_msg->node_addr);
+ FREE_NULL_HOSTLIST(host_list);
+ return ESLURM_INVALID_NODE_NAME;
+ }
+ if (node_cnt != hostlist_count(hostaddr_list)) {
+ info("update_node: nodecount mismatch");
+ FREE_NULL_HOSTLIST(host_list);
+ FREE_NULL_HOSTLIST(hostaddr_list);
+ return ESLURM_INVALID_NODE_NAME;
+ }
+ }
+
+ if (update_node_msg->node_hostname) {
+ hostname_list = hostlist_create(update_node_msg->node_hostname);
+ if (hostname_list == NULL) {
+ info("update_node: hostlist_create error on %s: %m",
+ update_node_msg->node_hostname);
+ FREE_NULL_HOSTLIST(host_list);
+ FREE_NULL_HOSTLIST(hostaddr_list);
+ return ESLURM_INVALID_NODE_NAME;
+ }
+ if (node_cnt != hostlist_count(hostname_list)) {
+ info("update_node: nodecount mismatch");
+ FREE_NULL_HOSTLIST(host_list);
+ FREE_NULL_HOSTLIST(hostaddr_list);
+ FREE_NULL_HOSTLIST(hostname_list);
+ return ESLURM_INVALID_NODE_NAME;
+ }
+ }
while ( (this_node_name = hostlist_shift (host_list)) ) {
int err_code = 0;
+
state_val = update_node_msg->node_state;
node_ptr = find_node_record (this_node_name);
node_inx = node_ptr - node_record_table_ptr;
@@ -848,6 +977,24 @@
break;
}
+ if (hostaddr_list) {
+ char *this_addr = hostlist_shift(hostaddr_list);
+ xfree(node_ptr->comm_name);
+ node_ptr->comm_name = xstrdup(this_addr);
+ free(this_addr);
+ }
+ if (hostname_list) {
+ char *this_hostname = hostlist_shift(hostname_list);
+ xfree(node_ptr->node_hostname);
+ node_ptr->node_hostname = xstrdup(this_hostname);
+ free(this_hostname);
+ }
+ if (hostaddr_list || hostname_list) {
+ /* This updates the lookup table addresses */
+ slurm_reset_alias(node_ptr->name, node_ptr->comm_name,
+ node_ptr->node_hostname);
+ }
+
if (update_node_msg->features) {
xfree(node_ptr->features);
if (update_node_msg->features[0])
@@ -886,7 +1033,9 @@
}
base_state &= NODE_STATE_BASE;
}
+
if (state_val != (uint16_t) NO_VAL) {
+ node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (state_val == NODE_RESUME) {
if (IS_NODE_IDLE(node_ptr) &&
(IS_NODE_DRAIN(node_ptr) ||
@@ -900,8 +1049,10 @@
node_ptr->node_state &= (~NODE_STATE_FAIL);
if (IS_NODE_DOWN(node_ptr)) {
state_val = NODE_STATE_IDLE;
+#ifndef HAVE_FRONT_END
node_ptr->node_state |=
NODE_STATE_NO_RESPOND;
+#endif
node_ptr->last_response = now;
ping_nodes_now = true;
} else if (IS_NODE_FUTURE(node_ptr)) {
@@ -914,8 +1065,10 @@
node_ptr->comm_name);
if (node_ptr->slurm_addr.sin_port) {
state_val = NODE_STATE_IDLE;
+#ifndef HAVE_FRONT_END
node_ptr->node_state |=
NODE_STATE_NO_RESPOND;
+#endif
node_ptr->last_response = now;
ping_nodes_now = true;
} else {
@@ -927,11 +1080,16 @@
} else
state_val = base_state;
}
- if (state_val == NODE_STATE_DOWN) {
+ if ((state_val == NODE_STATE_DOWN) ||
+ (state_val == NODE_STATE_FUTURE)) {
/* We must set node DOWN before killing
* its jobs */
_make_node_down(node_ptr, now);
kill_running_job_by_node_name (this_node_name);
+ if (state_val == NODE_STATE_FUTURE) {
+ node_ptr->node_state = NODE_STATE_FUTURE
+ | node_flags;
+ }
} else if (state_val == NODE_STATE_IDLE) {
/* assume they want to clear DRAIN and
* FAIL flags too */
@@ -956,7 +1114,6 @@
bit_set (idle_node_bitmap, node_inx);
bit_set (up_node_bitmap, node_inx);
node_ptr->last_idle = now;
- reset_job_priority();
} else if (state_val == NODE_STATE_ALLOCATED) {
if (!IS_NODE_DRAIN(node_ptr) &&
!IS_NODE_FAIL(node_ptr) &&
@@ -1031,7 +1188,9 @@
free (this_node_name);
}
- hostlist_destroy (host_list);
+ FREE_NULL_HOSTLIST(host_list);
+ FREE_NULL_HOSTLIST(hostaddr_list);
+ FREE_NULL_HOSTLIST(hostname_list);
last_node_update = now;
if ((error_code == 0) && (update_node_msg->features)) {
@@ -1483,6 +1642,12 @@
return true;
break;
+ case NODE_STATE_FUTURE:
+ if ((base_state == NODE_STATE_DOWN) ||
+ (base_state == NODE_STATE_IDLE))
+ return true;
+ break;
+
case NODE_STATE_IDLE:
if ((base_state == NODE_STATE_DOWN) ||
(base_state == NODE_STATE_IDLE))
@@ -1650,7 +1815,6 @@
reg_msg->os = NULL; /* Nothing left to free */
if (IS_NODE_NO_RESPOND(node_ptr)) {
- reset_job_priority();
node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
node_ptr->node_state &= (~NODE_STATE_POWER_UP);
last_node_update = time (NULL);
@@ -1671,11 +1835,14 @@
last_node_update = time (NULL);
}
} else {
- if (IS_NODE_UNKNOWN(node_ptr)) {
- reset_job_priority();
+ if (IS_NODE_UNKNOWN(node_ptr) || IS_NODE_FUTURE(node_ptr)) {
debug("validate_node_specs: node %s registered with "
"%u jobs",
reg_msg->node_name,reg_msg->job_count);
+ if (IS_NODE_FUTURE(node_ptr) &&
+ IS_NODE_MAINT(node_ptr) &&
+ !is_node_in_maint_reservation(node_inx))
+ node_flags &= (~NODE_STATE_MAINT);
if (reg_msg->job_count) {
node_ptr->node_state = NODE_STATE_ALLOCATED |
node_flags;
@@ -1709,7 +1876,6 @@
}
info("node %s returned to service",
reg_msg->node_name);
- reset_job_priority();
trigger_node_up(node_ptr);
last_node_update = now;
if (!IS_NODE_DRAIN(node_ptr)
@@ -1755,12 +1921,12 @@
"with %u running jobs",
node_ptr->name, reg_msg->job_count);
}
- /*
+ /*
* there must be completing job(s) on this node since
* reg_msg->job_count was set (run_job_cnt +
* comp_job_cnt) in validate_jobs_on_node()
*/
- if (node_ptr->comp_job_cnt != 0) {
+ if (node_ptr->comp_job_cnt != 0) {
node_ptr->node_state |= NODE_STATE_COMPLETING;
bit_set(cg_node_bitmap, node_inx);
}
@@ -1811,8 +1977,8 @@
state_flags = front_end_ptr->node_state & JOB_STATE_FLAGS;
if ((state_base == NODE_STATE_DOWN) &&
(!strncmp(front_end_ptr->reason, "Not responding", 14))) {
- info("FrontEnd node %s returned to service",
- reg_msg->node_name);
+ error("front end node %s returned to service",
+ reg_msg->node_name);
state_base = NODE_STATE_IDLE;
xfree(front_end_ptr->reason);
front_end_ptr->reason_time = (time_t) 0;
@@ -1929,18 +2095,12 @@
error("Registered job %u.%u in state %s on %s",
reg_msg->job_id[i], reg_msg->step_id[i],
job_state_string(job_ptr->job_state),
- front_end_ptr->name);
+ front_end_ptr->name);
kill_job_on_node(reg_msg->job_id[i], job_ptr,
node_ptr);
}
}
- if (reg_msg->job_count == 0) {
- front_end_ptr->job_cnt_comp = 0;
- front_end_ptr->node_state &= (~NODE_STATE_COMPLETING);
- } else if (front_end_ptr->job_cnt_comp != 0)
- front_end_ptr->node_state |= NODE_STATE_COMPLETING;
-
/* purge orphan batch jobs */
job_iterator = list_iterator_create(job_list);
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
@@ -2094,10 +2254,8 @@
hostlist_destroy(reg_hostlist);
}
- if (update_node_state) {
- reset_job_priority();
+ if (update_node_state)
last_node_update = time (NULL);
- }
return error_code;
}
@@ -2169,9 +2327,10 @@
node_ptr->last_response = now;
if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_POWER_UP(node_ptr)) {
info("Node %s now responding", node_ptr->name);
- reset_job_priority();
node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
node_ptr->node_state &= (~NODE_STATE_POWER_UP);
+ if (!is_node_in_maint_reservation(node_inx))
+ node_ptr->node_state &= (~NODE_STATE_MAINT);
last_node_update = now;
}
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
@@ -2464,6 +2623,8 @@
for (i = 0; i < node_record_count; i++, node_ptr++) {
if (IS_NODE_FUTURE(node_ptr))
continue;
+ if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr))
+ continue;
hostlist_push(kill_agent_args->hostlist, node_ptr->name);
kill_agent_args->node_count++;
}
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index ef583b6..4171626 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -47,6 +47,10 @@
# include <sys/syslog.h>
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
@@ -101,7 +105,6 @@
int *node_set_size);
static void _filter_nodes_in_set(struct node_set *node_set_ptr,
struct job_details *detail_ptr);
-static int _list_find_feature(void *feature_entry, void *key);
static int _match_feature(char *seek, struct node_set *node_set_ptr);
static int _nodes_in_sets(bitstr_t *req_bitmap,
struct node_set * node_set_ptr,
@@ -113,7 +116,7 @@
uint32_t min_nodes, uint32_t max_nodes,
uint32_t req_nodes, bool test_only,
List preemptee_candidates,
- List *preemptee_job_list);
+ List *preemptee_job_list, bool has_xand);
static bool _valid_feature_counts(struct job_details *detail_ptr,
bitstr_t *node_bitmap, bool *has_xor);
static bitstr_t *_valid_features(struct job_details *detail_ptr,
@@ -130,28 +133,63 @@
{
int i;
struct node_record *node_ptr;
+ bool has_cloud = false, has_cloud_power_save = false;
- xfree(job_ptr->batch_host);
-#ifdef HAVE_FRONT_END
- job_ptr->front_end_ptr = assign_front_end();
- xassert(job_ptr->front_end_ptr);
- job_ptr->batch_host = xstrdup(job_ptr->front_end_ptr->name);
-#endif
-
- for (i = 0, node_ptr = node_record_table_ptr;
- i < node_record_count; i++, node_ptr++) {
+ for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
+ i++, node_ptr++) {
if (!bit_test(job_ptr->node_bitmap, i))
continue;
- make_node_alloc(node_ptr, job_ptr);
- if (!job_ptr->batch_host)
- job_ptr->batch_host = xstrdup(node_ptr->name);
- }
- last_node_update = time(NULL);
+ if (IS_NODE_CLOUD(node_ptr)) {
+ has_cloud = true;
+ if (IS_NODE_POWER_SAVE(node_ptr))
+ has_cloud_power_save = true;
+ }
+ make_node_alloc(node_ptr, job_ptr);
+ }
+
+ last_node_update = time(NULL);
license_job_get(job_ptr);
+
+ if (has_cloud) {
+ if (has_cloud_power_save) {
+ job_ptr->alias_list = xstrdup("TBD");
+ job_ptr->wait_all_nodes = 1;
+ } else
+ set_job_alias_list(job_ptr);
+ }
+
return;
}
+/* Set a job's alias_list string */
+extern void set_job_alias_list(struct job_record *job_ptr)
+{
+ int i;
+ struct node_record *node_ptr;
+
+ xfree(job_ptr->alias_list);
+ for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
+ i++, node_ptr++) {
+ if (!bit_test(job_ptr->node_bitmap, i))
+ continue;
+
+ if (IS_NODE_CLOUD(node_ptr)) {
+ if (IS_NODE_POWER_SAVE(node_ptr)) {
+ xfree(job_ptr->alias_list);
+ job_ptr->alias_list = xstrdup("TBD");
+ break;
+ }
+ if (job_ptr->alias_list)
+ xstrcat(job_ptr->alias_list, ",");
+ xstrcat(job_ptr->alias_list, node_ptr->name);
+ xstrcat(job_ptr->alias_list, ":");
+ xstrcat(job_ptr->alias_list, node_ptr->comm_name);
+ xstrcat(job_ptr->alias_list, ":");
+ xstrcat(job_ptr->alias_list, node_ptr->node_hostname);
+ }
+ }
+}
/*
* deallocate_nodes - for a given job, deallocate its nodes and make
@@ -234,7 +272,7 @@
job_ptr->cpu_cnt = 0;
job_ptr->node_cnt = 0;
} else {
- front_end_ptr->job_cnt_comp++;
+ bool set_fe_comp = false;
if (front_end_ptr->job_cnt_run)
front_end_ptr->job_cnt_run--;
else {
@@ -245,7 +283,6 @@
uint16_t state_flags;
state_flags = front_end_ptr->node_state &
NODE_STATE_FLAGS;
- state_flags |= NODE_STATE_COMPLETING;
front_end_ptr->node_state = NODE_STATE_IDLE |
state_flags;
}
@@ -254,6 +291,12 @@
if (!bit_test(job_ptr->node_bitmap, i))
continue;
make_node_comp(node_ptr, job_ptr, suspended);
+ set_fe_comp = true;
+ }
+ if (set_fe_comp) {
+ front_end_ptr->job_cnt_comp++;
+ front_end_ptr->node_state |=
+ NODE_STATE_COMPLETING;
}
}
@@ -321,7 +364,7 @@
if (seek == NULL)
return 1; /* nothing to look for */
- feat_ptr = list_find_first(feature_list, _list_find_feature,
+ feat_ptr = list_find_first(feature_list, list_find_feature,
(void *) seek);
if (feat_ptr == NULL)
return 0; /* no such feature */
@@ -430,6 +473,7 @@
bitstr_t *feature_bitmap, *accumulate_bitmap = NULL;
bitstr_t *save_avail_node_bitmap = NULL, *resv_bitmap = NULL;
List preemptee_candidates = NULL;
+ bool has_xand = false;
/* Mark nodes reserved for other jobs as off limit for this job.
* If the job has a reservation, we've already limited the contents
@@ -524,7 +568,7 @@
job_ptr, part_ptr, min_nodes,
max_nodes, req_nodes, test_only,
preemptee_candidates,
- preemptee_job_list);
+ preemptee_job_list, false);
#if 0
{
char *tmp_str = bitmap2node_name(feature_bitmap);
@@ -545,7 +589,13 @@
if (error_code != SLURM_SUCCESS)
break;
if (feature_bitmap) {
- if (job_ptr->details->req_node_bitmap) {
+ if (feat_ptr->op_code == FEATURE_OP_XAND)
+ has_xand = true;
+ if (has_xand) {
+ /* Don't make it required since we
+ * check value on each call to
+ * _pick_best_nodes() */
+ } else if (job_ptr->details->req_node_bitmap) {
bit_or(job_ptr->details->
req_node_bitmap,
feature_bitmap);
@@ -609,12 +659,13 @@
error_code = _pick_best_nodes(node_set_ptr, node_set_size,
select_bitmap, job_ptr, part_ptr, min_nodes,
max_nodes, req_nodes, test_only,
- preemptee_candidates, preemptee_job_list);
+ preemptee_candidates, preemptee_job_list,
+ has_xand);
}
#if 0
{
char *tmp_str = bitmap2node_name(*select_bitmap);
- info("job %u allocated nodes %s err:%u",
+ info("job %u allocated nodes:%s err:%u",
job_ptr->job_id, tmp_str, error_code);
xfree(tmp_str);
}
@@ -651,6 +702,8 @@
* IN test_only - do not actually allocate resources
* IN/OUT preemptee_job_list - list of pointers to jobs to be preempted
* NULL on first entry
+ * IN has_xand - set of the constraint list includes XAND operators *and*
+ * we have already satisfied them all
* RET SLURM_SUCCESS on success,
* ESLURM_NODES_BUSY if request can not be satisfied now,
* ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE if request can never
@@ -680,7 +733,7 @@
struct part_record *part_ptr,
uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes,
bool test_only, List preemptee_candidates,
- List *preemptee_job_list)
+ List *preemptee_job_list, bool has_xand)
{
int error_code = SLURM_SUCCESS, i, j, pick_code;
int total_nodes = 0, avail_nodes = 0;
@@ -693,6 +746,7 @@
bool tried_sched = false; /* Tried to schedule with avail nodes */
static uint32_t cr_enabled = NO_VAL;
bool preempt_flag = false;
+ bool nodes_busy = false;
int shared = 0, select_mode;
if (test_only)
@@ -700,7 +754,26 @@
else
select_mode = SELECT_MODE_RUN_NOW;
- if (node_set_size == 0) {
+ if ((job_ptr->details->min_nodes == 0) &&
+ (job_ptr->details->max_nodes == 0)) {
+ avail_bitmap = bit_alloc(node_record_count);
+ if (!avail_bitmap)
+ fatal("bit_alloc: malloc failure");
+ pick_code = select_g_job_test(job_ptr,
+ avail_bitmap,
+ 0, 0, 0,
+ select_mode,
+ preemptee_candidates,
+ preemptee_job_list);
+
+ if (pick_code == SLURM_SUCCESS) {
+ *select_bitmap = avail_bitmap;
+ return SLURM_SUCCESS;
+ } else {
+ bit_free(avail_bitmap);
+ return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
+ }
+ } else if (node_set_size == 0) {
info("_pick_best_nodes: empty node set for selection");
return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
}
@@ -752,11 +825,13 @@
share_node_bitmap)) {
return ESLURM_NODES_BUSY;
}
+#ifndef HAVE_BG
if (bit_overlap(job_ptr->details->
req_node_bitmap,
cg_node_bitmap)) {
return ESLURM_NODES_BUSY;
}
+#endif
} else {
if (!bit_super_set(job_ptr->details->
req_node_bitmap,
@@ -765,9 +840,11 @@
}
/* Note: IDLE nodes are not COMPLETING */
}
+#ifndef HAVE_BG
} else if (bit_overlap(job_ptr->details->req_node_bitmap,
cg_node_bitmap)) {
return ESLURM_NODES_BUSY;
+#endif
}
/* still must go through select_g_job_test() to
@@ -812,16 +889,19 @@
if (!bit_super_set(job_ptr->details->req_node_bitmap,
avail_bitmap))
missing_required_nodes = true;
- FREE_NULL_BITMAP(avail_bitmap);
+
if (missing_required_nodes)
continue;
+ FREE_NULL_BITMAP(avail_bitmap);
avail_bitmap = bit_copy(job_ptr->details->
req_node_bitmap);
if (avail_bitmap == NULL)
fatal("bit_copy malloc failure");
}
for (i = 0; i < node_set_size; i++) {
- if (!bit_test(node_set_ptr[i].feature_bits, j))
+ int count1 = 0, count2 = 0;
+ if (!has_xand &&
+ !bit_test(node_set_ptr[i].feature_bits, j))
continue;
if (total_bitmap) {
@@ -835,24 +915,38 @@
}
bit_and(node_set_ptr[i].my_bitmap, avail_node_bitmap);
+ if (!nodes_busy) {
+ count1 = bit_set_count(node_set_ptr[i].
+ my_bitmap);
+ }
if (!preempt_flag) {
if (shared) {
bit_and(node_set_ptr[i].my_bitmap,
share_node_bitmap);
+#ifndef HAVE_BG
bit_not(cg_node_bitmap);
bit_and(node_set_ptr[i].my_bitmap,
cg_node_bitmap);
bit_not(cg_node_bitmap);
+#endif
} else {
bit_and(node_set_ptr[i].my_bitmap,
idle_node_bitmap);
/* IDLE nodes are not COMPLETING */
}
} else {
+#ifndef HAVE_BG
bit_not(cg_node_bitmap);
bit_and(node_set_ptr[i].my_bitmap,
cg_node_bitmap);
bit_not(cg_node_bitmap);
+#endif
+ }
+ if (!nodes_busy) {
+ count2 = bit_set_count(node_set_ptr[i].
+ my_bitmap);
+ if (count1 != count2)
+ nodes_busy = true;
}
if (avail_bitmap) {
bit_or(avail_bitmap,
@@ -887,6 +981,8 @@
list_destroy(*preemptee_job_list);
*preemptee_job_list = NULL;
}
+ if (job_ptr->details->req_node_bitmap == NULL)
+ bit_and(avail_bitmap, avail_node_bitmap);
pick_code = select_g_job_test(job_ptr,
avail_bitmap,
min_nodes,
@@ -961,7 +1057,8 @@
((job_ptr->details->req_node_bitmap == NULL) ||
(bit_super_set(job_ptr->details->req_node_bitmap,
total_bitmap)))) {
- if (!runable_avail) {
+ avail_nodes = bit_set_count(avail_bitmap);
+ if (!runable_avail && (avail_nodes >= min_nodes)) {
FREE_NULL_BITMAP(avail_bitmap);
avail_bitmap = bit_copy(total_bitmap);
if (avail_bitmap == NULL)
@@ -1008,12 +1105,12 @@
/* The job is not able to start right now, return a
* value indicating when the job can start */
- if (!runable_avail)
- error_code = ESLURM_NODE_NOT_AVAIL;
if (!runable_ever) {
error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
info("_pick_best_nodes: job %u never runnable",
job_ptr->job_id);
+ } else if (!runable_avail && !nodes_busy) {
+ error_code = ESLURM_NODE_NOT_AVAIL;
}
if (error_code == SLURM_SUCCESS) {
@@ -1083,6 +1180,11 @@
}
if (rc != SLURM_SUCCESS) {
+ if ((mode != PREEMPT_MODE_CANCEL)
+ && (slurm_job_check_grace(job_ptr)
+ == SLURM_SUCCESS))
+ continue;
+
rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true);
if (rc == SLURM_SUCCESS)
info("preempted job %u had to be killed",
@@ -1131,7 +1233,6 @@
time_t now = time(NULL);
bool configuring = false;
List preemptee_job_list = NULL;
- slurmdb_association_rec_t *assoc_ptr = NULL;
slurmdb_qos_rec_t *qos_ptr = NULL;
xassert(job_ptr);
@@ -1141,7 +1242,6 @@
return ESLURM_ACCOUNTING_POLICY;
part_ptr = job_ptr->part_ptr;
- assoc_ptr = (slurmdb_association_rec_t *)job_ptr->assoc_ptr;
qos_ptr = (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
/* identify partition */
@@ -1153,59 +1253,20 @@
job_ptr->job_id, job_ptr->partition);
}
- /* Confirm that partition is up and has compatible nodes limits */
- fail_reason = WAIT_NO_REASON;
- if (part_ptr->state_up == PARTITION_DOWN)
- fail_reason = WAIT_PART_DOWN;
- else if (part_ptr->state_up == PARTITION_INACTIVE)
- fail_reason = WAIT_PART_INACTIVE;
- else if (job_ptr->priority == 0) /* user or administrator hold */
- fail_reason = WAIT_HELD;
- else if ((job_ptr->time_limit != NO_VAL) &&
- ((job_ptr->time_limit > part_ptr->max_time) &&
- (!qos_ptr || (qos_ptr && !(qos_ptr->flags
- & QOS_FLAG_PART_TIME_LIMIT)))))
- fail_reason = WAIT_PART_TIME_LIMIT;
- else if (((job_ptr->details->max_nodes != 0) &&
- ((job_ptr->details->max_nodes < part_ptr->min_nodes) &&
- (!qos_ptr || (qos_ptr && !(qos_ptr->flags
- & QOS_FLAG_PART_MIN_NODE))))) ||
- ((job_ptr->details->min_nodes > part_ptr->max_nodes) &&
- (!qos_ptr || (qos_ptr && !(qos_ptr->flags
- & QOS_FLAG_PART_MAX_NODE)))))
- fail_reason = WAIT_PART_NODE_LIMIT;
- else if (qos_ptr && assoc_ptr &&
- (qos_ptr->flags & QOS_FLAG_ENFORCE_USAGE_THRES) &&
- (!fuzzy_equal(qos_ptr->usage_thres, NO_VAL))) {
- if (!job_ptr->prio_factors)
- job_ptr->prio_factors =
- xmalloc(sizeof(priority_factors_object_t));
-
- if (!job_ptr->prio_factors->priority_fs) {
- if (fuzzy_equal(assoc_ptr->usage->usage_efctv, NO_VAL))
- priority_g_set_assoc_usage(assoc_ptr);
- job_ptr->prio_factors->priority_fs =
- priority_g_calc_fs_factor(
- assoc_ptr->usage->usage_efctv,
- (long double)assoc_ptr->usage->
- shares_norm);
+ if (job_ptr->priority == 0) { /* user/admin hold */
+ if ((job_ptr->state_reason != WAIT_HELD) &&
+ (job_ptr->state_reason != WAIT_HELD_USER)) {
+ job_ptr->state_reason = WAIT_HELD;
}
- if (job_ptr->prio_factors->priority_fs < qos_ptr->usage_thres)
- fail_reason = WAIT_QOS_THRES;
+ return ESLURM_JOB_HELD;
}
+ /* Confirm that partition is up and has compatible nodes limits */
+ fail_reason = job_limits_check(&job_ptr);
if (fail_reason != WAIT_NO_REASON) {
last_job_update = now;
xfree(job_ptr->state_desc);
- if (job_ptr->priority == 0) { /* user/admin hold */
- if ((job_ptr->state_reason != WAIT_HELD) &&
- (job_ptr->state_reason != WAIT_HELD_USER)) {
- job_ptr->state_reason = WAIT_HELD;
- }
- return ESLURM_JOB_HELD;
- }
job_ptr->state_reason = fail_reason;
- job_ptr->priority = 1; /* sys hold, move to end of queue */
return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
}
@@ -1230,6 +1291,8 @@
/* info("req: %u-%u, %u", job_ptr->details->min_nodes, */
/* job_ptr->details->max_nodes, part_ptr->max_nodes); */
+ /* On BlueGene systems don't adjust the min/max node limits
+ here. We are working on midplane values. */
if (qos_ptr && (qos_ptr->flags & QOS_FLAG_PART_MIN_NODE))
min_nodes = job_ptr->details->min_nodes;
else
@@ -1282,13 +1345,15 @@
(detail_ptr->preempt_start_time >
(now - slurmctld_conf.kill_wait -
slurmctld_conf.msg_timeout))) {
- /* Job preemption still in progress,
+ /* Job preemption may still be in progress,
* do not preempt any more jobs yet */
error_code = ESLURM_NODES_BUSY;
} else {
_preempt_jobs(preemptee_job_list, &error_code);
- if (error_code == ESLURM_NODES_BUSY)
+ if ((error_code == ESLURM_NODES_BUSY) &&
+ (detail_ptr->preempt_start_time == 0)) {
detail_ptr->preempt_start_time = now;
+ }
}
}
if (error_code) {
@@ -1298,8 +1363,6 @@
job_ptr->job_id);
job_ptr->state_reason = WAIT_PART_NODE_LIMIT;
xfree(job_ptr->state_desc);
- if (job_ptr->priority != 0) /* Move to end of queue */
- job_ptr->priority = 1;
last_job_update = now;
} else if (error_code == ESLURM_NODE_NOT_AVAIL) {
/* Required nodes are down or drained */
@@ -1307,8 +1370,6 @@
job_ptr->job_id);
job_ptr->state_reason = WAIT_NODE_NOT_AVAIL;
xfree(job_ptr->state_desc);
- if (job_ptr->priority != 0) /* Move to end of queue */
- job_ptr->priority = 1;
last_job_update = now;
} else if (error_code == ESLURM_RESERVATION_NOT_USABLE) {
job_ptr->state_reason = WAIT_RESERVATION;
@@ -1339,7 +1400,10 @@
* is for the job when we place it
*/
job_ptr->start_time = job_ptr->time_last_active = now;
- if (job_ptr->time_limit == NO_VAL) {
+ if ((job_ptr->time_limit == NO_VAL) ||
+ ((job_ptr->time_limit > part_ptr->max_time) &&
+ (!qos_ptr || (qos_ptr && !(qos_ptr->flags
+ & QOS_FLAG_PART_TIME_LIMIT))))) {
if (part_ptr->default_time != NO_VAL)
job_ptr->time_limit = part_ptr->default_time;
else
@@ -1376,7 +1440,7 @@
}
select_bitmap = NULL; /* nothing left to free */
allocate_nodes(job_ptr);
- build_node_details(job_ptr);
+ build_node_details(job_ptr, true);
/* This could be set in the select plugin so we want to keep
the flag. */
@@ -1393,6 +1457,7 @@
if (job_ptr->mail_type & MAIL_JOB_BEGIN)
mail_job_info(job_ptr, MAIL_JOB_BEGIN);
+ slurmctld_diag_stats.jobs_started++;
acct_policy_job_begin(job_ptr);
/* If ran with slurmdbd this is handled out of band in the
@@ -1424,12 +1489,12 @@
}
/*
- * _list_find_feature - find an entry in the feature list, see list.h for
+ * list_find_feature - find an entry in the feature list, see list.h for
* documentation
* IN key - is feature name or NULL for all features
* RET 1 if found, 0 otherwise
*/
-static int _list_find_feature(void *feature_entry, void *key)
+extern int list_find_feature(void *feature_entry, void *key)
{
struct features_record *feature_ptr;
@@ -1444,7 +1509,7 @@
/*
* _valid_feature_counts - validate a job's features can be satisfied
- * by the selected nodes (NOTE: does not process XOR operators)
+ * by the selected nodes (NOTE: does not process XOR or XAND operators)
* IN detail_ptr - job details
* IN/OUT node_bitmap - nodes available for use, clear if unusable
* RET true if valid, false otherwise
@@ -1475,16 +1540,17 @@
fatal("list_iterator_create malloc error");
while ((job_feat_ptr = (struct feature_record *)
list_next(job_feat_iter))) {
- feat_ptr = list_find_first(feature_list, _list_find_feature,
+ feat_ptr = list_find_first(feature_list, list_find_feature,
(void *) job_feat_ptr->name);
if (feat_ptr) {
if (last_op == FEATURE_OP_AND)
bit_and(feature_bitmap, feat_ptr->node_bitmap);
- else if (last_op == FEATURE_OP_XOR) {
+ else if (last_op == FEATURE_OP_OR)
+ bit_or(feature_bitmap, feat_ptr->node_bitmap);
+ else { /* FEATURE_OP_XOR or FEATURE_OP_XAND */
*has_xor = true;
bit_or(feature_bitmap, feat_ptr->node_bitmap);
- } else /* FEATURE_OP_OR */
- bit_or(feature_bitmap, feat_ptr->node_bitmap);
+ }
} else { /* feature not found */
if (last_op == FEATURE_OP_AND) {
bit_nclear(feature_bitmap, 0,
@@ -1507,7 +1573,7 @@
if (job_feat_ptr->count == 0)
continue;
feat_ptr = list_find_first(feature_list,
- _list_find_feature,
+ list_find_feature,
(void *)job_feat_ptr->name);
if (!feat_ptr) {
rc = false;
@@ -1670,6 +1736,12 @@
return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
}
}
+ if ((job_ptr->details->min_nodes == 0) &&
+ (job_ptr->details->max_nodes == 0)) {
+ *node_set_pptr = NULL;
+ *node_set_size = 0;
+ return SLURM_SUCCESS;
+ }
node_set_inx = 0;
node_set_ptr = (struct node_set *)
@@ -1961,8 +2033,9 @@
/*
* build_node_details - sets addresses for allocated nodes
* IN job_ptr - pointer to a job record
+ * IN new_alloc - set if new job allocation, cleared if state recovery
*/
-extern void build_node_details(struct job_record *job_ptr)
+extern void build_node_details(struct job_record *job_ptr, bool new_alloc)
{
hostlist_t host_list = NULL;
struct node_record *node_ptr;
@@ -1983,6 +2056,24 @@
xrealloc(job_ptr->node_addr,
(sizeof(slurm_addr_t) * job_ptr->node_cnt));
+#ifdef HAVE_FRONT_END
+ if (new_alloc) {
+ /* Find available front-end node and assign it to this job */
+ xfree(job_ptr->batch_host);
+ job_ptr->front_end_ptr = assign_front_end(NULL);
+ if (job_ptr->front_end_ptr) {
+ job_ptr->batch_host = xstrdup(job_ptr->
+ front_end_ptr->name);
+ }
+ } else if (job_ptr->batch_host) {
+ /* Reset pointer to this job's front-end node */
+ job_ptr->front_end_ptr = assign_front_end(job_ptr->batch_host);
+ if (!job_ptr->front_end_ptr)
+ xfree(job_ptr->batch_host);
+ }
+#else
+ xfree(job_ptr->batch_host);
+#endif
while ((this_node_name = hostlist_shift(host_list))) {
if ((node_ptr = find_node_record(this_node_name))) {
memcpy(&job_ptr->node_addr[node_inx++],
@@ -1991,6 +2082,8 @@
error("Invalid node %s in JobId=%u",
this_node_name, job_ptr->job_id);
}
+ if (job_ptr->batch_host == NULL)
+ job_ptr->batch_host = xstrdup(this_node_name);
free(this_node_name);
}
hostlist_destroy(host_list);
@@ -2036,10 +2129,12 @@
fatal("list_iterator_create malloc failure");
while ((job_feat_ptr = (struct feature_record *)
list_next(feat_iter))) {
- if ((job_feat_ptr->op_code == FEATURE_OP_XOR) ||
+ if ((job_feat_ptr->op_code == FEATURE_OP_XAND) ||
+ (job_feat_ptr->op_code == FEATURE_OP_XOR) ||
+ (last_op == FEATURE_OP_XAND) ||
(last_op == FEATURE_OP_XOR)) {
feat_ptr = list_find_first(feature_list,
- _list_find_feature,
+ list_find_feature,
(void *)job_feat_ptr->name);
if (feat_ptr &&
bit_super_set(config_ptr->node_bitmap,
@@ -2169,19 +2264,20 @@
host_str = hostlist_ranged_string_xmalloc(kill_hostlist);
#ifdef HAVE_BG
if (job_ptr->job_id != last_job_id) {
- info("Resending TERMINATE_JOB request JobId=%u BPlist=%s",
- job_ptr->job_id, host_str);
+ info("Resending TERMINATE_JOB request JobId=%u Midplanelist=%s",
+ job_ptr->job_id, host_str);
} else {
- debug("Resending TERMINATE_JOB request JobId=%u BPlist=%s",
- job_ptr->job_id, host_str);
+ debug("Resending TERMINATE_JOB request JobId=%u "
+ "Midplanelist=%s",
+ job_ptr->job_id, host_str);
}
#else
if (job_ptr->job_id != last_job_id) {
info("Resending TERMINATE_JOB request JobId=%u Nodelist=%s",
- job_ptr->job_id, host_str);
+ job_ptr->job_id, host_str);
} else {
debug("Resending TERMINATE_JOB request JobId=%u Nodelist=%s",
- job_ptr->job_id, host_str);
+ job_ptr->job_id, host_str);
}
#endif
xfree(host_str);
diff --git a/src/slurmctld/node_scheduler.h b/src/slurmctld/node_scheduler.h
index 57abf8f..b164909 100644
--- a/src/slurmctld/node_scheduler.h
+++ b/src/slurmctld/node_scheduler.h
@@ -50,8 +50,9 @@
/*
* build_node_details - sets addresses for allocated nodes
* IN job_ptr - pointer to a job record
+ * IN new_alloc - set if new job allocation, cleared if state recovery
*/
-extern void build_node_details(struct job_record *job_ptr);
+extern void build_node_details(struct job_record *job_ptr, bool new_alloc);
/*
* deallocate_nodes - for a given job, deallocate its nodes and make
diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c
index a5829f9..ff5bf11 100644
--- a/src/slurmctld/partition_mgr.c
+++ b/src/slurmctld/partition_mgr.c
@@ -550,7 +550,8 @@
if ((flags & PART_FLAG_DEFAULT_CLR) ||
(flags & PART_FLAG_HIDDEN_CLR) ||
(flags & PART_FLAG_NO_ROOT_CLR) ||
- (flags & PART_FLAG_ROOT_ONLY_CLR)) {
+ (flags & PART_FLAG_ROOT_ONLY_CLR) ||
+ (flags & PART_FLAG_REQ_RESV_CLR)) {
error("Invalid data for partition %s: flags=%u",
part_name, flags);
error_code = EINVAL;
@@ -1159,6 +1160,16 @@
part_ptr->flags &= (~PART_FLAG_HIDDEN);
}
+ if (part_desc->flags & PART_FLAG_REQ_RESV) {
+ info("update_part: setting req_resv for partition %s",
+ part_desc->name);
+ part_ptr->flags |= PART_FLAG_REQ_RESV;
+ } else if (part_desc->flags & PART_FLAG_REQ_RESV_CLR) {
+ info("update_part: clearing req_resv for partition %s",
+ part_desc->name);
+ part_ptr->flags &= (~PART_FLAG_REQ_RESV);
+ }
+
if (part_desc->flags & PART_FLAG_ROOT_ONLY) {
info("update_part: setting root_only for partition %s",
part_desc->name);
@@ -1375,7 +1386,6 @@
if (error_code == SLURM_SUCCESS) {
slurm_sched_partition_change(); /* notify sched plugin */
select_g_reconfigure(); /* notify select plugin too */
- reset_job_priority(); /* free jobs */
}
return error_code;
@@ -1429,10 +1439,10 @@
status=hostlist_find(hl,alloc_node);
hostlist_destroy(hl);
- if(status==-1)
- status=0;
+ if (status == -1)
+ status = 0;
else
- status=1;
+ status = 1;
return status;
}
@@ -1577,7 +1587,40 @@
slurm_sched_partition_change(); /* notify sched plugin */
select_g_reconfigure(); /* notify select plugin too */
- reset_job_priority(); /* free jobs */
return SLURM_SUCCESS;
}
+
+/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by a miscellaneous limit. This does not re-validate job state,
+ * but relies upon schedule() in src/slurmctld/job_scheduler.c to do so.
+ */
+extern bool misc_policy_job_runnable_state(struct job_record *job_ptr)
+{
+ if ((job_ptr->state_reason == FAIL_ACCOUNT) ||
+ (job_ptr->state_reason == FAIL_QOS) ||
+ (job_ptr->state_reason == WAIT_NODE_NOT_AVAIL)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by a partition state or limit. Execute job_limits_check() to
+ * re-validate job state.
+ */
+extern bool part_policy_job_runnable_state(struct job_record *job_ptr)
+{
+ if ((job_ptr->state_reason == WAIT_PART_DOWN) ||
+ (job_ptr->state_reason == WAIT_PART_INACTIVE) ||
+ (job_ptr->state_reason == WAIT_PART_NODE_LIMIT) ||
+ (job_ptr->state_reason == WAIT_PART_TIME_LIMIT) ||
+ (job_ptr->state_reason == WAIT_QOS_THRES)) {
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c
index 755d3df..8d08c4e 100644
--- a/src/slurmctld/power_save.c
+++ b/src/slurmctld/power_save.c
@@ -296,6 +296,7 @@
int i;
char program[1024], arg0[1024], arg1[1024], *pname;
pid_t child;
+ slurm_ctl_conf_t *ctlconf;
if (prog == NULL) /* disabled, useful for testing */
return -1;
@@ -318,6 +319,9 @@
#else
setpgrp();
#endif
+ ctlconf = slurm_conf_lock();
+ setenv("SLURM_CONF", ctlconf->slurm_conf, 1);
+ slurm_conf_unlock();
execl(program, arg0, arg1, NULL);
exit(1);
} else if (child < 0) {
diff --git a/src/slurmctld/preempt.c b/src/slurmctld/preempt.c
index fa51a05..59cc11a 100644
--- a/src/slurmctld/preempt.c
+++ b/src/slurmctld/preempt.c
@@ -254,6 +254,12 @@
int retval = SLURM_SUCCESS;
char *preempt_type = NULL;
+ /* This function is called frequently, so it should be as fast as
+ * possible. The test below will be TRUE almost all of the time and
+ * is as fast as possible. */
+ if (g_preempt_context)
+ return retval;
+
slurm_mutex_lock(&g_preempt_context_lock);
if (g_preempt_context)
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index e58a9d2..8623791 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -129,6 +129,7 @@
inline static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg);
inline static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg);
inline static void _slurm_rpc_ping(slurm_msg_t * msg);
+inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg);
inline static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg);
inline static void _slurm_rpc_resv_create(slurm_msg_t * msg);
inline static void _slurm_rpc_resv_update(slurm_msg_t * msg);
@@ -156,10 +157,12 @@
inline static void _slurm_rpc_update_node(slurm_msg_t * msg);
inline static void _slurm_rpc_update_partition(slurm_msg_t * msg);
inline static void _slurm_rpc_update_block(slurm_msg_t * msg);
-inline static void _slurm_rpc_dump_spank(slurm_msg_t * msg);
+inline static void _slurm_rpc_dump_spank(slurm_msg_t * msg);
+inline static void _slurm_rpc_dump_stats(slurm_msg_t * msg);
inline static void _update_cred_key(void);
+extern diag_stats_t slurmctld_diag_stats;
/*
* slurmctld_req - Process an individual RPC request
@@ -416,7 +419,7 @@
break;
case ACCOUNTING_REGISTER_CTLD:
_slurm_rpc_accounting_register_ctld(msg);
- /* No body to free */
+ slurm_free_reboot_msg(msg->data);
break;
case REQUEST_TOPO_INFO:
_slurm_rpc_get_topo(msg);
@@ -426,6 +429,14 @@
_slurm_rpc_dump_spank(msg);
slurm_free_spank_env_request_msg(msg->data);
break;
+ case REQUEST_REBOOT_NODES:
+ _slurm_rpc_reboot_nodes(msg);
+ /* No body to free */
+ break;
+ case REQUEST_STATS_INFO:
+ _slurm_rpc_dump_stats(msg);
+ slurm_free_stats_info_request_msg(msg->data);
+ break;
default:
error("invalid RPC msg_type=%d", msg->msg_type);
slurm_send_rc_msg(msg, EINVAL);
@@ -542,6 +553,7 @@
conf_ptr->priority_decay_hl = conf->priority_decay_hl;
conf_ptr->priority_calc_period = conf->priority_calc_period;
conf_ptr->priority_favor_small= conf->priority_favor_small;
+ conf_ptr->priority_flags = conf->priority_flags;
conf_ptr->priority_max_age = conf->priority_max_age;
conf_ptr->priority_reset_period = conf->priority_reset_period;
conf_ptr->priority_type = xstrdup(conf->priority_type);
@@ -561,6 +573,8 @@
conf_ptr->propagate_rlimits_except = xstrdup(conf->
propagate_rlimits_except);
+ conf_ptr->reboot_program = xstrdup(conf->reboot_program);
+ conf_ptr->reconfig_flags = conf->reconfig_flags;
conf_ptr->resume_program = xstrdup(conf->resume_program);
conf_ptr->resume_rate = conf->resume_rate;
conf_ptr->resume_timeout = conf->resume_timeout;
@@ -847,6 +861,7 @@
alloc_msg.job_id = job_ptr->job_id;
alloc_msg.node_cnt = job_ptr->node_cnt;
alloc_msg.node_list = xstrdup(job_ptr->nodes);
+ alloc_msg.alias_list = xstrdup(job_ptr->alias_list);
alloc_msg.select_jobinfo =
select_g_select_jobinfo_copy(job_ptr->select_jobinfo);
if (job_ptr->details) {
@@ -1045,12 +1060,11 @@
slurm_msg_t response_msg;
uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
- req_msg->uid = uid;
START_TIMER;
debug2("Processing RPC: REQUEST_PRIORITY_FACTORS from uid=%d", uid);
resp_msg.priority_factors_list = priority_g_get_priority_factors_list(
- req_msg);
+ req_msg, uid);
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
@@ -1342,6 +1356,7 @@
if (job_step_kill_msg->signal == SIGKILL) {
info("sched: Cancel of JobId=%u by UID=%u, %s",
job_step_kill_msg->job_id, uid, TIME_STR);
+ slurmctld_diag_stats.jobs_canceled++;
} else {
info("Signal %u of JobId=%u by UID=%u, %s",
job_step_kill_msg->signal,
@@ -1542,6 +1557,7 @@
comp_msg->job_id,
msg_title, nodes,
slurm_strerror(comp_msg->slurm_rc));
+ slurmctld_diag_stats.jobs_failed++;
if (error_code == SLURM_SUCCESS) {
#ifdef HAVE_BG
if (job_ptr) {
@@ -1610,6 +1626,7 @@
debug2("_slurm_rpc_complete_batch_script JobId=%u %s",
comp_msg->job_id, TIME_STR);
slurm_send_rc_msg(msg, SLURM_SUCCESS);
+ slurmctld_diag_stats.jobs_completed++;
dump_job = true;
}
if (dump_job)
@@ -1773,6 +1790,27 @@
}
}
+static bool _is_valid_will_run_user(job_desc_msg_t *job_desc_msg, uid_t uid)
+{
+ char *account = "";
+
+ if ((uid == job_desc_msg->user_id) || validate_operator(uid))
+ return true;
+
+ if (job_desc_msg->job_id != NO_VAL) {
+ struct job_record *job_ptr;
+ job_ptr = find_job_record(job_desc_msg->job_id);
+ if (job_ptr)
+ account = job_ptr->account;
+ } else if (job_desc_msg->account) {
+ account = job_desc_msg->account;
+ }
+ if (assoc_mgr_is_user_acct_coord(acct_db_conn, uid, account))
+ return true;
+
+ return false;
+}
+
/* _slurm_rpc_job_will_run - process RPC to determine if job with given
* configuration can be initiated */
static void _slurm_rpc_job_will_run(slurm_msg_t * msg)
@@ -1780,7 +1818,7 @@
/* init */
DEF_TIMERS;
int error_code = SLURM_SUCCESS;
- struct job_record *job_ptr;
+ struct job_record *job_ptr = NULL;
job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data;
/* Locks: Write job, read node, read partition */
slurmctld_lock_t job_write_lock = {
@@ -1794,9 +1832,7 @@
debug2("Processing RPC: REQUEST_JOB_WILL_RUN from uid=%d", uid);
/* do RPC call */
- if ( (uid != job_desc_msg->user_id) && (!validate_operator(uid)) &&
- !assoc_mgr_is_user_acct_coord(acct_db_conn, uid,
- job_ptr->account) ) {
+ if (!_is_valid_will_run_user(job_desc_msg, uid)) {
error_code = ESLURM_USER_ID_MISSING;
error("Security violation, JOB_WILL_RUN RPC from uid=%d", uid);
}
@@ -2071,6 +2107,7 @@
job_info_resp_msg.job_id = job_info_msg->job_id;
job_info_resp_msg.node_cnt = job_ptr->node_cnt;
job_info_resp_msg.node_list = xstrdup(job_ptr->nodes);
+ job_info_resp_msg.alias_list = xstrdup(job_ptr->alias_list);
job_info_resp_msg.select_jobinfo =
select_g_select_jobinfo_copy(job_ptr->select_jobinfo);
unlock_slurmctld(job_read_lock);
@@ -2121,8 +2158,11 @@
job_info_msg->job_id, uid,
slurm_strerror(error_code));
slurm_send_rc_msg(msg, error_code);
- } else if ((sbcast_cred = create_sbcast_cred(slurmctld_config.cred_ctx,
- job_ptr->job_id, job_ptr->nodes)) == NULL) {
+ } else if ((sbcast_cred =
+ create_sbcast_cred(slurmctld_config.cred_ctx,
+ job_ptr->job_id,
+ job_ptr->nodes,
+ job_ptr->end_time)) == NULL){
unlock_slurmctld(job_read_lock);
error("_slurm_rpc_job_sbcast_cred JobId=%u cred create error",
job_info_msg->job_id);
@@ -2620,6 +2660,8 @@
false, NULL, 0, uid, &job_ptr);
unlock_slurmctld(job_write_lock);
END_TIMER2("_slurm_rpc_submit_batch_job");
+ if (job_desc_msg->immediate && (error_code != SLURM_SUCCESS))
+ error_code = ESLURM_CAN_NOT_START_IMMEDIATELY;
}
/* return result */
@@ -3634,6 +3676,7 @@
launch_msg_ptr->gid = job_ptr->group_id;
launch_msg_ptr->uid = uid;
launch_msg_ptr->nodes = xstrdup(job_ptr->nodes);
+ launch_msg_ptr->nodes = xstrdup(job_ptr->alias_list);
launch_msg_ptr->restart_cnt = job_ptr->restart_cnt;
if (job_ptr->details) {
launch_msg_ptr->pn_min_memory = job_ptr->details->
@@ -4046,6 +4089,70 @@
slurm_send_rc_msg(msg, rc);
}
+/* _slurm_rpc_reboot_nodes - process RPC to schedule nodes reboot */
+inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg)
+{
+ int rc;
+ uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
+#ifndef HAVE_FRONT_END
+ int i;
+ struct node_record *node_ptr;
+ reboot_msg_t *reboot_msg = (reboot_msg_t *)msg->data;
+ char *nodelist = NULL;
+ bitstr_t *bitmap = NULL;
+ /* Locks: write node lock */
+ slurmctld_lock_t node_write_lock = {
+ NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK };
+#endif
+ DEF_TIMERS;
+
+ START_TIMER;
+ debug2("Processing RPC: REQUEST_REBOOT_NODES from uid=%d", uid);
+ if (!validate_super_user(uid)) {
+ error("Security violation, REBOOT_NODES RPC from uid=%d", uid);
+ slurm_send_rc_msg(msg, EACCES);
+ return;
+ }
+#ifdef HAVE_FRONT_END
+ rc = ESLURM_NOT_SUPPORTED;
+#else
+ /* do RPC call */
+ if (reboot_msg)
+ nodelist = reboot_msg->node_list;
+ if (!nodelist || !strcasecmp(nodelist, "ALL")) {
+ bitmap = bit_alloc(node_record_count);
+ if (!bitmap)
+ fatal("malloc failure");
+ bit_nset(bitmap, 0, (node_record_count - 1));
+ } else if (node_name2bitmap(nodelist, false, &bitmap) != 0) {
+ FREE_NULL_BITMAP(bitmap);
+ error("Invalid node list in REBOOT_NODES request");
+ slurm_send_rc_msg(msg, ESLURM_INVALID_NODE_NAME);
+ return;
+ }
+
+ lock_slurmctld(node_write_lock);
+ for (i = 0, node_ptr = node_record_table_ptr;
+ i < node_record_count; i++, node_ptr++) {
+ if (bit_test(bitmap, i) == 0)
+ continue;
+ if (IS_NODE_MAINT(node_ptr)) /* already on maintenance */
+ continue;
+ if (IS_NODE_FUTURE(node_ptr) || IS_NODE_DOWN(node_ptr))
+ continue;
+ if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr))
+ continue;
+ node_ptr->node_state |= NODE_STATE_MAINT;
+ want_nodes_reboot = true;
+ }
+ unlock_slurmctld(node_write_lock);
+ FREE_NULL_BITMAP(bitmap);
+ rc = SLURM_SUCCESS;
+#endif
+ END_TIMER2("_slurm_rpc_reboot_nodes");
+ slurm_send_rc_msg(msg, rc);
+}
+
inline static void _slurm_rpc_accounting_first_reg(slurm_msg_t *msg)
{
uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
@@ -4144,3 +4251,48 @@
slurm_send_node_msg(msg->conn_fd, &response_msg);
slurm_free_spank_env_responce_msg(spank_resp_msg);
}
+
+
+/* _slurm_rpc_dump_stats - process RPC for statistics information */
+static void _slurm_rpc_dump_stats(slurm_msg_t * msg)
+{
+ char *dump;
+ int dump_size;
+ stats_info_request_msg_t *request_msg;
+ slurm_msg_t response_msg;
+ uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
+
+ request_msg = (stats_info_request_msg_t *)msg->data;
+
+ if ((request_msg->command_id == STAT_COMMAND_RESET) &&
+ !validate_slurm_user(uid)) {
+ error("Security violation, MESSAGE_REALTIME_STATS reset "
+ "from uid=%d", uid);
+ slurm_send_rc_msg(msg, ESLURM_ACCESS_DENIED);
+ return;
+ }
+
+ debug("SIM: Processing RPC: MESSAGE_REALTIME_STATS (command: %u)",
+ request_msg->command_id);
+
+ slurm_msg_t_init(&response_msg);
+ response_msg.protocol_version = msg->protocol_version;
+ response_msg.address = msg->address;
+ response_msg.msg_type = RESPONSE_STATS_INFO;
+
+ if (request_msg->command_id == STAT_COMMAND_RESET) {
+ reset_stats(1);
+ pack_all_stat(0, &dump, &dump_size, msg->protocol_version);
+ response_msg.data = dump;
+ response_msg.data_size = dump_size;
+ } else {
+ pack_all_stat(1, &dump, &dump_size, msg->protocol_version);
+ response_msg.data = dump;
+ response_msg.data_size = dump_size;
+ }
+
+ /* send message */
+ slurm_send_node_msg(msg->conn_fd, &response_msg);
+ xfree(dump);
+}
+
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index d851fb3..1301b03 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -71,6 +71,7 @@
#include "src/common/slurm_rlimits_info.h"
#include "src/common/switch.h"
#include "src/common/xstring.h"
+#include "src/common/strnatcmp.h"
#include "src/slurmctld/acct_policy.h"
#include "src/slurmctld/front_end.h"
@@ -121,11 +122,50 @@
#endif
/*
- * _reorder_node_record_table - order node table in ascending order of node_rank
+ * _reorder_nodes_by_name - order node table in ascending order of name
+ */
+static void _reorder_nodes_by_name(void)
+{
+ struct node_record *node_ptr, *node_ptr2;
+ int i, j, min_inx;
+
+ /* Now we need to sort the node records */
+ for (i = 0; i < node_record_count; i++) {
+ min_inx = i;
+ for (j = i + 1; j < node_record_count; j++) {
+ if (strnatcmp(node_record_table_ptr[j].name,
+ node_record_table_ptr[min_inx].name) < 0)
+ min_inx = j;
+ }
+
+ if (min_inx != i) { /* swap records */
+ struct node_record node_record_tmp;
+
+ j = sizeof(struct node_record);
+ node_ptr = node_record_table_ptr + i;
+ node_ptr2 = node_record_table_ptr + min_inx;
+
+ memcpy(&node_record_tmp, node_ptr, j);
+ memcpy(node_ptr, node_ptr2, j);
+ memcpy(node_ptr2, &node_record_tmp, j);
+ }
+ }
+
+#if _DEBUG
+ /* Log the results */
+ for (i=0, node_ptr = node_record_table_ptr; i < node_record_count;
+ i++, node_ptr++) {
+ info("node_rank[%d]: %s", i, node_ptr->name);
+ }
+#endif
+}
+
+/*
+ * _reorder_nodes_by_rank - order node table in ascending order of node_rank
* This depends on the TopologyPlugin and/or SelectPlugin, which may generate
* such a ranking.
*/
-static void _reorder_node_record_table(void)
+static void _reorder_nodes_by_rank(void)
{
struct node_record *node_ptr, *node_ptr2;
int i, j, min_inx;
@@ -146,7 +186,7 @@
struct node_record node_record_tmp;
j = sizeof(struct node_record);
- node_ptr = node_record_table_ptr + i;
+ node_ptr = node_record_table_ptr + i;
node_ptr2 = node_record_table_ptr + min_inx;
memcpy(&node_record_tmp, node_ptr, j);
@@ -159,7 +199,7 @@
/* Log the results */
for (i=0, node_ptr = node_record_table_ptr; i < node_record_count;
i++, node_ptr++) {
- info("%s: %u", node_ptr->name, node_ptr->node_rank);
+ info("node_rank[%u]: %s", node_ptr->node_rank, node_ptr->name);
}
#endif
}
@@ -494,8 +534,10 @@
xfree(part_ptr->name);
part_ptr->name = xstrdup(part->name);
} else {
- verbose("_parse_part_spec: duplicate entry for partition %s",
- part->name);
+ /* FIXME - maybe should be fatal? */
+ error("_parse_part_spec: duplicate entry for partition %s, "
+ "ignoring", part->name);
+ return EEXIST;
}
if (part->default_flag) {
@@ -514,7 +556,7 @@
if (part->preempt_mode != (uint16_t) NO_VAL)
part_ptr->preempt_mode = part->preempt_mode;
- if(part->disable_root_jobs == (uint16_t)NO_VAL) {
+ if (part->disable_root_jobs == (uint16_t)NO_VAL) {
if (slurmctld_conf.disable_root_jobs)
part_ptr->flags |= PART_FLAG_NO_ROOT;
} else if (part->disable_root_jobs) {
@@ -523,7 +565,7 @@
part_ptr->flags &= (~PART_FLAG_NO_ROOT);
}
- if(part_ptr->flags & PART_FLAG_NO_ROOT)
+ if (part_ptr->flags & PART_FLAG_NO_ROOT)
debug2("partition %s does not allow root jobs", part_ptr->name);
if ((part->default_time != NO_VAL) &&
@@ -537,6 +579,8 @@
part_ptr->flags |= PART_FLAG_HIDDEN;
if (part->root_only_flag)
part_ptr->flags |= PART_FLAG_ROOT_ONLY;
+ if (part->req_resv_flag)
+ part_ptr->flags |= PART_FLAG_REQ_RESV;
part_ptr->max_time = part->max_time;
part_ptr->def_mem_per_cpu = part->def_mem_per_cpu;
part_ptr->default_time = part->default_time;
@@ -593,7 +637,7 @@
cnt_uniq = hostlist_count(hl);
if (cnt_tot != cnt_uniq) {
fatal("Duplicate Nodes for Partition %s",
- part->name);
+ part->name);
}
xfree(part_ptr->nodes);
part_ptr->nodes = hostlist_ranged_string_xmalloc(hl);
@@ -767,7 +811,9 @@
do_reorder_nodes |= select_g_node_ranking(node_record_table_ptr,
node_record_count);
if (do_reorder_nodes)
- _reorder_node_record_table();
+ _reorder_nodes_by_rank();
+ else
+ _reorder_nodes_by_name();
rehash_node();
rehash_jobs();
@@ -780,7 +826,8 @@
old_node_record_count);
error_code = MAX(error_code, rc); /* not fatal */
}
- if (old_part_list && (recover > 1)) {
+ if (old_part_list && ((recover > 1) ||
+ (slurmctld_conf.reconfig_flags & RECONFIG_KEEP_PART_INFO))) {
info("restoring original partition state");
rc = _restore_part_state(old_part_list,
old_def_part_name);
@@ -806,7 +853,6 @@
sync_job_priorities();
}
- sync_front_end_state();
_sync_part_prio();
_build_bitmaps_pre_select();
if ((select_g_node_init(node_record_table_ptr, node_record_count)
@@ -971,16 +1017,35 @@
hs = hostset_create(node_ptr->name);
}
+ if (IS_NODE_CLOUD(node_ptr) && !IS_NODE_POWER_SAVE(node_ptr)) {
+ /* Preserve NodeHostname + NodeAddr set by scontrol */
+ xfree(node_ptr->comm_name);
+ node_ptr->comm_name = old_node_ptr->comm_name;
+ old_node_ptr->comm_name = NULL;
+ xfree(node_ptr->node_hostname);
+ node_ptr->node_hostname = old_node_ptr->node_hostname;
+ old_node_ptr->node_hostname = NULL;
+ slurm_reset_alias(node_ptr->name, node_ptr->comm_name,
+ node_ptr->node_hostname);
+ }
+
node_ptr->last_response = old_node_ptr->last_response;
+
+#ifndef HAVE_BG
+ /* If running on a BlueGene system the cpus never
+ change so just skip this.
+ */
if (old_node_ptr->port != node_ptr->config_ptr->cpus) {
rc = ESLURM_NEED_RESTART;
error("Configured cpu count change on %s (%u to %u)",
node_ptr->name, old_node_ptr->port,
node_ptr->config_ptr->cpus);
}
+#endif
node_ptr->boot_time = old_node_ptr->boot_time;
node_ptr->cpus = old_node_ptr->cpus;
node_ptr->cores = old_node_ptr->cores;
+ node_ptr->last_idle = old_node_ptr->last_idle;
node_ptr->sockets = old_node_ptr->sockets;
node_ptr->threads = old_node_ptr->threads;
node_ptr->real_memory = old_node_ptr->real_memory;
@@ -1150,6 +1215,15 @@
else
part_ptr->flags &= (~PART_FLAG_ROOT_ONLY);
}
+ if ((part_ptr->flags & PART_FLAG_REQ_RESV) !=
+ (old_part_ptr->flags & PART_FLAG_REQ_RESV)) {
+ error("Partition %s ReqResv differs from "
+ "slurm.conf", part_ptr->name);
+ if (old_part_ptr->flags & PART_FLAG_REQ_RESV)
+ part_ptr->flags |= PART_FLAG_REQ_RESV;
+ else
+ part_ptr->flags &= (~PART_FLAG_REQ_RESV);
+ }
if (part_ptr->max_nodes_orig !=
old_part_ptr->max_nodes_orig) {
error("Partition %s MaxNodes differs from "
@@ -1305,6 +1379,7 @@
return gs_fini();
}
+ error("Invalid gang scheduling mode change");
return EINVAL;
}
@@ -1410,9 +1485,11 @@
}
list_iterator_destroy(job_iterator);
- if (update_cnt)
+ if (update_cnt) {
info("_sync_nodes_to_jobs updated state of %d nodes",
update_cnt);
+ }
+ sync_front_end_state();
return update_cnt;
}
diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c
index 440c278..1671640 100644
--- a/src/slurmctld/reservation.c
+++ b/src/slurmctld/reservation.c
@@ -72,6 +72,7 @@
#include "src/slurmctld/licenses.h"
#include "src/slurmctld/locks.h"
+#include "src/slurmctld/reservation.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/state_save.h"
@@ -115,6 +116,9 @@
bool internal);
static bitstr_t *_pick_idle_nodes(bitstr_t *avail_nodes,
resv_desc_msg_t *resv_desc_ptr);
+static bitstr_t *_pick_idle_node_cnt(bitstr_t *avail_bitmap,
+ resv_desc_msg_t *resv_desc_ptr,
+ uint32_t node_cnt);
static int _post_resv_create(slurmctld_resv_t *resv_ptr);
static int _post_resv_delete(slurmctld_resv_t *resv_ptr);
static int _post_resv_update(slurmctld_resv_t *resv_ptr,
@@ -300,7 +304,8 @@
{
char start_str[32] = "-1", end_str[32] = "-1", *flag_str = NULL;
- int duration;
+ char *node_cnt_str = NULL;
+ int duration, i;
if (!(slurm_get_debug_flags() & DEBUG_FLAG_RESERVATION))
return;
@@ -321,15 +326,28 @@
else
duration = resv_ptr->duration;
+ if (resv_ptr->node_cnt) {
+ for (i = 0; resv_ptr->node_cnt[i]; i++) {
+ if (node_cnt_str) {
+ xstrfmtcat(node_cnt_str, ",%u",
+ resv_ptr->node_cnt[i]);
+ } else {
+ xstrfmtcat(node_cnt_str, "%u",
+ resv_ptr->node_cnt[i]);
+ }
+ }
+ }
+
info("%s: Name=%s StartTime=%s EndTime=%s Duration=%d "
- "Flags=%s NodeCnt=%d NodeList=%s Features=%s "
+ "Flags=%s NodeCnt=%s NodeList=%s Features=%s "
"PartitionName=%s Users=%s Accounts=%s Licenses=%s",
mode, resv_ptr->name, start_str, end_str, duration,
- flag_str, resv_ptr->node_cnt, resv_ptr->node_list,
+ flag_str, node_cnt_str, resv_ptr->node_list,
resv_ptr->features, resv_ptr->partition,
resv_ptr->users, resv_ptr->accounts, resv_ptr->licenses);
xfree(flag_str);
+ xfree(node_cnt_str);
}
static void _generate_resv_id(void)
@@ -450,6 +468,8 @@
acct_db_conn, &assoc,
accounting_enforce, assoc_list);
if (rc != SLURM_SUCCESS) {
+ error("No associations for UID %u",
+ assoc.uid);
rc = ESLURM_INVALID_ACCOUNT;
goto end_it;
}
@@ -1171,7 +1191,7 @@
/* Create a resource reservation */
extern int create_resv(resv_desc_msg_t *resv_desc_ptr)
{
- int i, rc = SLURM_SUCCESS;
+ int i, j, rc = SLURM_SUCCESS;
time_t now = time(NULL);
struct part_record *part_ptr = NULL;
bitstr_t *node_bitmap = NULL;
@@ -1182,6 +1202,7 @@
char start_time[32], end_time[32];
List license_list = (List) NULL;
char *name1, *name2, *val1, *val2;
+ uint32_t total_node_cnt = NO_VAL;
if (!resv_list)
resv_list = list_create(_del_resv_rec);
@@ -1218,7 +1239,8 @@
RESERVE_FLAG_IGN_JOBS |
RESERVE_FLAG_DAILY |
RESERVE_FLAG_WEEKLY |
- RESERVE_FLAG_LIC_ONLY;
+ RESERVE_FLAG_LIC_ONLY |
+ RESERVE_FLAG_STATIC;
}
if (resv_desc_ptr->partition) {
part_ptr = find_part_record(resv_desc_ptr->partition);
@@ -1257,9 +1279,25 @@
rc = ESLURM_INVALID_LICENSES;
goto bad_parse;
}
- if ((resv_desc_ptr->node_cnt == NO_VAL) &&
- (resv_desc_ptr->node_list == NULL))
- resv_desc_ptr->node_cnt = 0;
+ }
+
+ /* Sort the list of jobs in descending order */
+ if (resv_desc_ptr->node_cnt) {
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ int max_inx = i;
+ for (j = (i + 1); resv_desc_ptr->node_cnt[j]; j++) {
+ if (resv_desc_ptr->node_cnt[j] >
+ resv_desc_ptr->node_cnt[max_inx])
+ max_inx = j;
+ }
+ if (max_inx != i) { /* swap the values */
+ uint32_t max_val = resv_desc_ptr->
+ node_cnt[max_inx];
+ resv_desc_ptr->node_cnt[max_inx] =
+ resv_desc_ptr->node_cnt[i];
+ resv_desc_ptr->node_cnt[i] = max_val;
+ }
+ }
}
#ifdef HAVE_BG
@@ -1267,10 +1305,33 @@
select_g_alter_node_cnt(SELECT_GET_NODE_SCALING,
&cnodes_per_bp);
}
- if ((resv_desc_ptr->node_cnt != NO_VAL) && cnodes_per_bp) {
+ if (resv_desc_ptr->node_cnt && cnodes_per_bp) {
+ /* Pack multiple small blocks into midplane rather than
+ * allocating a whole midplane for each small block */
+ int small_block_nodes = 0, small_block_count = 0;
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ if (resv_desc_ptr->node_cnt[i] < cnodes_per_bp)
+ small_block_nodes += resv_desc_ptr->node_cnt[i];
+ }
+ small_block_count = small_block_nodes;
+ small_block_count += (cnodes_per_bp - 1);
+ small_block_count /= cnodes_per_bp;
+
/* Convert c-node count to midplane count */
- resv_desc_ptr->node_cnt = (resv_desc_ptr->node_cnt +
- cnodes_per_bp - 1) / cnodes_per_bp;
+ total_node_cnt = 0;
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ if (resv_desc_ptr->node_cnt[i] < cnodes_per_bp) {
+ if (small_block_count == 0) {
+ resv_desc_ptr->node_cnt[i] = 0;
+ break;
+ }
+ small_block_count--;
+ }
+
+ resv_desc_ptr->node_cnt[i] += (cnodes_per_bp - 1);
+ resv_desc_ptr->node_cnt[i] /= cnodes_per_bp;
+ total_node_cnt += resv_desc_ptr->node_cnt[i];
+ }
}
#endif
@@ -1287,8 +1348,6 @@
rc = ESLURM_INVALID_NODE_NAME;
goto bad_parse;
}
- if (resv_desc_ptr->node_cnt == NO_VAL)
- resv_desc_ptr->node_cnt = 0;
if (!(resv_desc_ptr->flags & RESERVE_FLAG_OVERLAP) &&
_resv_overlap(resv_desc_ptr->start_time,
resv_desc_ptr->end_time,
@@ -1298,7 +1357,7 @@
rc = ESLURM_RESERVATION_OVERLAP;
goto bad_parse;
}
- resv_desc_ptr->node_cnt = bit_set_count(node_bitmap);
+ total_node_cnt = bit_set_count(node_bitmap);
if (!(resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) &&
_job_overlap(resv_desc_ptr->start_time,
resv_desc_ptr->flags, node_bitmap)) {
@@ -1306,7 +1365,8 @@
rc = ESLURM_NODES_BUSY;
goto bad_parse;
}
- } else if ((resv_desc_ptr->node_cnt == NO_VAL) &&
+ } else if (((resv_desc_ptr->node_cnt == NULL) ||
+ (resv_desc_ptr->node_cnt[0] == 0)) &&
((resv_desc_ptr->flags & RESERVE_FLAG_LIC_ONLY) == 0)) {
info("Reservation request lacks node specification");
rc = ESLURM_INVALID_NODE_NAME;
@@ -1314,6 +1374,10 @@
} else if ((rc = _select_nodes(resv_desc_ptr, &part_ptr, &node_bitmap))
!= SLURM_SUCCESS) {
goto bad_parse;
+ } else {
+ /* Get count of allocated nodes, on BlueGene systems, this
+ * might be more than requested */
+ total_node_cnt = bit_set_count(node_bitmap);
}
_generate_resv_id();
@@ -1355,7 +1419,7 @@
resv_ptr->resv_id = top_suffix;
xassert(resv_ptr->magic = RESV_MAGIC); /* Sets value */
resv_ptr->name = xstrdup(resv_desc_ptr->name);
- resv_ptr->node_cnt = resv_desc_ptr->node_cnt;
+ resv_ptr->node_cnt = total_node_cnt;
resv_ptr->node_list = resv_desc_ptr->node_list;
resv_desc_ptr->node_list = NULL; /* Nothing left to free */
resv_ptr->node_bitmap = node_bitmap; /* May be unset */
@@ -1424,7 +1488,7 @@
{
time_t now = time(NULL);
slurmctld_resv_t *resv_backup, *resv_ptr;
- int error_code = SLURM_SUCCESS, rc;
+ int error_code = SLURM_SUCCESS, i, rc;
char start_time[32], end_time[32];
char *name1, *name2, *val1, *val2;
@@ -1467,6 +1531,10 @@
resv_ptr->flags |= RESERVE_FLAG_LIC_ONLY;
if (resv_desc_ptr->flags & RESERVE_FLAG_NO_LIC_ONLY)
resv_ptr->flags &= (~RESERVE_FLAG_LIC_ONLY);
+ if (resv_desc_ptr->flags & RESERVE_FLAG_STATIC)
+ resv_ptr->flags |= RESERVE_FLAG_STATIC;
+ if (resv_desc_ptr->flags & RESERVE_FLAG_NO_STATIC)
+ resv_ptr->flags &= (~RESERVE_FLAG_STATIC);
}
if (resv_desc_ptr->partition && (resv_desc_ptr->partition[0] == '\0')){
/* Clear the partition */
@@ -1496,8 +1564,9 @@
}
}
if (resv_desc_ptr->licenses && (resv_desc_ptr->licenses[0] == '\0')) {
- if ((resv_desc_ptr->node_cnt == 0) ||
- ((resv_desc_ptr->node_cnt == NO_VAL) &&
+ if (((resv_desc_ptr->node_cnt != NULL) &&
+ (resv_desc_ptr->node_cnt[0] == 0)) ||
+ ((resv_desc_ptr->node_cnt == NULL) &&
(resv_ptr->node_cnt == 0))) {
info("Reservation attempt to clear licenses with "
"NodeCount=0");
@@ -1594,8 +1663,12 @@
xfree(resv_ptr->node_list);
FREE_NULL_BITMAP(resv_ptr->node_bitmap);
resv_ptr->node_bitmap = bit_alloc(node_record_count);
- if (resv_desc_ptr->node_cnt == NO_VAL)
- resv_desc_ptr->node_cnt = resv_ptr->node_cnt;
+ if ((resv_desc_ptr->node_cnt == NULL) ||
+ (resv_desc_ptr->node_cnt[0] == 0)) {
+ xrealloc(resv_desc_ptr->node_cnt, sizeof(uint32_t) * 2);
+ resv_desc_ptr->node_cnt[0] = resv_ptr->node_cnt;
+ resv_desc_ptr->node_cnt[1] = 0;
+ }
resv_ptr->node_cnt = 0;
}
if (resv_desc_ptr->node_list) { /* Change bitmap last */
@@ -1616,7 +1689,9 @@
resv_ptr->node_bitmap = node_bitmap;
resv_ptr->node_cnt = bit_set_count(resv_ptr->node_bitmap);
}
- if (resv_desc_ptr->node_cnt != NO_VAL) {
+ if (resv_desc_ptr->node_cnt) {
+ uint32_t total_node_cnt = 0;
+
#ifdef HAVE_BG
if (!cnodes_per_bp) {
select_g_alter_node_cnt(SELECT_GET_NODE_SCALING,
@@ -1624,12 +1699,16 @@
}
if (cnodes_per_bp) {
/* Convert c-node count to midplane count */
- resv_desc_ptr->node_cnt = (resv_desc_ptr->node_cnt +
- cnodes_per_bp - 1) /
- cnodes_per_bp;
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ resv_desc_ptr->node_cnt[i] += cnodes_per_bp - 1;
+ resv_desc_ptr->node_cnt[i] /= cnodes_per_bp;
+ }
}
#endif
- rc = _resize_resv(resv_ptr, resv_desc_ptr->node_cnt);
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ total_node_cnt += resv_desc_ptr->node_cnt[i];
+ }
+ rc = _resize_resv(resv_ptr, total_node_cnt);
if (rc) {
error_code = rc;
goto update_failure;
@@ -1675,6 +1754,7 @@
_post_resv_update(resv_ptr, resv_backup);
_del_resv_rec(resv_backup);
+ set_node_maint_mode(true);
last_resv_update = now;
schedule_resv_save();
return error_code;
@@ -2067,7 +2147,8 @@
int i;
resv_desc_msg_t resv_desc;
- if (resv_ptr->flags & RESERVE_FLAG_SPEC_NODES)
+ if (resv_ptr->flags & RESERVE_FLAG_SPEC_NODES ||
+ resv_ptr->flags & RESERVE_FLAG_STATIC)
return;
i = bit_overlap(resv_ptr->node_bitmap, avail_node_bitmap);
@@ -2081,8 +2162,10 @@
resv_desc.start_time = resv_ptr->start_time;
resv_desc.end_time = resv_ptr->end_time;
resv_desc.features = resv_ptr->features;
- resv_desc.node_cnt = resv_ptr->node_cnt - i;
+ resv_desc.node_cnt = xmalloc(sizeof(uint32_t) * 2);
+ resv_desc.node_cnt[0]= resv_ptr->node_cnt - i;
i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp_bitmap);
+ xfree(resv_desc.node_cnt);
xfree(resv_desc.node_list);
xfree(resv_desc.partition);
if (i == SLURM_SUCCESS) {
@@ -2299,6 +2382,7 @@
job_ptr->resv_id = resv_ptr->resv_id;
job_ptr->resv_flags = resv_ptr->flags;
job_ptr->resv_ptr = resv_ptr;
+ _validate_node_choice(resv_ptr);
}
return rc;
}
@@ -2359,8 +2443,10 @@
resv_desc.end_time = resv_ptr->end_time;
resv_desc.features = resv_ptr->features;
resv_desc.flags = resv_ptr->flags;
- resv_desc.node_cnt = 0 - delta_node_cnt;
+ resv_desc.node_cnt = xmalloc(sizeof(uint32_t) * 2);
+ resv_desc.node_cnt[0]= 0 - delta_node_cnt;
i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp1_bitmap);
+ xfree(resv_desc.node_cnt);
xfree(resv_desc.node_list);
xfree(resv_desc.partition);
if (i == SLURM_SUCCESS) {
@@ -2505,22 +2591,97 @@
static bitstr_t *_pick_idle_nodes(bitstr_t *avail_bitmap,
resv_desc_msg_t *resv_desc_ptr)
{
+ int i;
+ bitstr_t *ret_bitmap = NULL, *tmp_bitmap;
+ uint32_t total_node_cnt = 0;
+ bool resv_debug;
+#ifdef HAVE_BG
+ static uint16_t static_blocks = (uint16_t)NO_VAL;
+ if (static_blocks == (uint16_t)NO_VAL) {
+ /* Since this never changes we can just set it once
+ * and not look at it again. */
+ select_g_get_info_from_plugin(SELECT_STATIC_PART, NULL,
+ &static_blocks);
+ }
+#else
+ static uint16_t static_blocks = 0;
+#endif
+
+ if (resv_desc_ptr->node_cnt == NULL) {
+ return _pick_idle_node_cnt(avail_bitmap, resv_desc_ptr, 0);
+ } else if ((resv_desc_ptr->node_cnt[0] == 0) ||
+ (resv_desc_ptr->node_cnt[1] == 0)) {
+ return _pick_idle_node_cnt(avail_bitmap, resv_desc_ptr,
+ resv_desc_ptr->node_cnt[0]);
+ }
+
+ /* Try to create a single reservation that can contain all blocks
+ * unless we have static blocks on a BlueGene system */
+ if (static_blocks != 0) {
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++)
+ total_node_cnt += resv_desc_ptr->node_cnt[i];
+ tmp_bitmap = _pick_idle_node_cnt(avail_bitmap, resv_desc_ptr,
+ total_node_cnt);
+ if (tmp_bitmap) {
+ if (total_node_cnt == bit_set_count(tmp_bitmap))
+ return tmp_bitmap;
+ /* Oversized allocation, possibly due to BlueGene block
+ * size limitations. Need to create as multiple
+ * blocks */
+ FREE_NULL_BITMAP(tmp_bitmap);
+ }
+ }
+
+ /* Need to create reservation containing multiple blocks */
+ resv_debug = slurm_get_debug_flags() & DEBUG_FLAG_RESERVATION;
+ for (i = 0; resv_desc_ptr->node_cnt[i]; i++) {
+ tmp_bitmap = _pick_idle_node_cnt(avail_bitmap, resv_desc_ptr,
+ resv_desc_ptr->node_cnt[i]);
+ if (tmp_bitmap == NULL) { /* allocation failure */
+ if (resv_debug) {
+ info("reservation of %u nodes failed",
+ resv_desc_ptr->node_cnt[i]);
+ }
+ FREE_NULL_BITMAP(ret_bitmap);
+ return NULL;
+ }
+ if (resv_debug) {
+ char *tmp_name;
+ tmp_name = bitmap2node_name(tmp_bitmap);
+ info("reservation of %u nodes, using %s",
+ resv_desc_ptr->node_cnt[i], tmp_name);
+ xfree(tmp_name);
+ }
+ if (ret_bitmap)
+ bit_or(ret_bitmap, tmp_bitmap);
+ else
+ ret_bitmap = bit_copy(tmp_bitmap);
+ bit_not(tmp_bitmap);
+ bit_and(avail_bitmap, tmp_bitmap);
+ FREE_NULL_BITMAP(tmp_bitmap);
+ }
+
+ return ret_bitmap;
+}
+
+static bitstr_t *_pick_idle_node_cnt(bitstr_t *avail_bitmap,
+ resv_desc_msg_t *resv_desc_ptr,
+ uint32_t node_cnt)
+{
ListIterator job_iterator;
struct job_record *job_ptr;
bitstr_t *save_bitmap, *ret_bitmap, *tmp_bitmap;
- if (bit_set_count(avail_bitmap) < resv_desc_ptr->node_cnt) {
+ if (bit_set_count(avail_bitmap) < node_cnt) {
verbose("reservation requests more nodes than are available");
return NULL;
}
save_bitmap = bit_copy(avail_bitmap);
/* First: Try to reserve nodes that are currently IDLE */
- if (bit_overlap(avail_bitmap, idle_node_bitmap) >=
- resv_desc_ptr->node_cnt) {
+ if (bit_overlap(avail_bitmap, idle_node_bitmap) >= node_cnt) {
bit_and(avail_bitmap, idle_node_bitmap);
- ret_bitmap = select_g_resv_test(avail_bitmap,
- resv_desc_ptr->node_cnt);
+ ret_bitmap = select_g_resv_test(avail_bitmap, node_cnt);
if (ret_bitmap)
goto fini;
}
@@ -2540,7 +2701,7 @@
bit_not(job_ptr->node_bitmap);
}
list_iterator_destroy(job_iterator);
- ret_bitmap = select_g_resv_test(avail_bitmap, resv_desc_ptr->node_cnt);
+ ret_bitmap = select_g_resv_test(avail_bitmap, node_cnt);
if (ret_bitmap)
goto fini;
@@ -2564,7 +2725,6 @@
if (bit_set_count(tmp_bitmap) > 0) {
bit_or(avail_bitmap, tmp_bitmap);
ret_bitmap = select_g_resv_test(avail_bitmap,
- resv_desc_ptr->
node_cnt);
}
FREE_NULL_BITMAP(tmp_bitmap);
@@ -3126,7 +3286,7 @@
/* Set or clear NODE_STATE_MAINT for node_state as needed */
-extern void set_node_maint_mode(void)
+extern void set_node_maint_mode(bool reset_all)
{
ListIterator iter;
slurmctld_resv_t *resv_ptr;
@@ -3135,10 +3295,21 @@
if (!resv_list)
return;
+ if (reset_all) {
+ int i;
+ struct node_record *node_ptr;
+ for (i = 0, node_ptr = node_record_table_ptr;
+ i <= node_record_count;
+ i++, node_ptr++) {
+ node_ptr->node_state &= (~NODE_STATE_MAINT);
+ }
+ }
iter = list_iterator_create(resv_list);
if (!iter)
fatal("malloc: list_iterator_create");
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
+ if (reset_all)
+ resv_ptr->maint_set_node = false;
if ((resv_ptr->flags & RESERVE_FLAG_MAINT) == 0)
continue;
if ((now >= resv_ptr->start_time) &&
@@ -3157,6 +3328,32 @@
list_iterator_destroy(iter);
}
+/* checks if node within node_record_table_ptr is in maint reservation */
+extern bool is_node_in_maint_reservation(int nodenum)
+{
+ bool res = false;
+ ListIterator iter;
+ slurmctld_resv_t *resv_ptr;
+
+ if (nodenum < 0 || nodenum >= node_record_count || !resv_list)
+ return false;
+
+ iter = list_iterator_create(resv_list);
+ if (!iter)
+ fatal("malloc: list_iterator_create");
+ while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
+ if ((resv_ptr->flags & RESERVE_FLAG_MAINT) == 0)
+ continue;
+ if (bit_test(resv_ptr->node_bitmap, nodenum)) {
+ res = true;
+ break;
+ }
+ }
+ list_iterator_destroy(iter);
+
+ return res;
+}
+
extern void update_assocs_in_resvs(void)
{
slurmctld_resv_t *resv_ptr = NULL;
diff --git a/src/slurmctld/reservation.h b/src/slurmctld/reservation.h
index 27128d0..2ebb63f 100644
--- a/src/slurmctld/reservation.h
+++ b/src/slurmctld/reservation.h
@@ -71,8 +71,13 @@
*/
extern int send_resvs_to_accounting(void);
-/* Set or clear NODE_STATE_MAINT for node_state as needed */
-extern void set_node_maint_mode(void);
+/* Set or clear NODE_STATE_MAINT for node_state as needed
+ * IN reset_all - re-initialize all node information for all reservations
+ */
+extern void set_node_maint_mode(bool reset_all);
+
+/* checks if node within node_record_table_ptr is in maint reservation */
+extern bool is_node_in_maint_reservation(int nodenum);
/* After an assocation has been added or removed update the lists. */
extern void update_assocs_in_resvs(void);
diff --git a/src/slurmctld/sched_plugin.c b/src/slurmctld/sched_plugin.c
index 55d8908..d508ed0 100644
--- a/src/slurmctld/sched_plugin.c
+++ b/src/slurmctld/sched_plugin.c
@@ -83,7 +83,7 @@
static slurm_sched_context_t *g_sched_context = NULL;
static pthread_mutex_t g_sched_context_lock = PTHREAD_MUTEX_INITIALIZER;
-
+static bool init_run = false;
/* ************************************************************************ */
/* TAG( slurm_sched_get_ops ) */
@@ -225,6 +225,9 @@
int retval = SLURM_SUCCESS;
char *sched_type = NULL;
+ if ( init_run && g_sched_context )
+ return retval;
+
slurm_mutex_lock( &g_sched_context_lock );
if ( g_sched_context )
@@ -250,6 +253,7 @@
if ( (slurm_get_preempt_mode() & PREEMPT_MODE_GANG) &&
(gs_init() != SLURM_SUCCESS))
error( "cannot start gang scheduler ");
+ init_run = true;
done:
slurm_mutex_unlock( &g_sched_context_lock );
@@ -268,6 +272,7 @@
if (!g_sched_context)
return SLURM_SUCCESS;
+ init_run = false;
rc = slurm_sched_context_destroy(g_sched_context);
g_sched_context = NULL;
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 89402f5..1339ce1 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -169,6 +169,41 @@
#endif
} slurmctld_config_t;
+/* Job scheduling statistics */
+typedef struct diag_stats {
+ int proc_req_threads;
+ int proc_req_raw;
+
+ uint32_t schedule_cycle_max;
+ uint32_t schedule_cycle_last;
+ uint32_t schedule_cycle_sum;
+ uint32_t schedule_cycle_counter;
+ uint32_t schedule_cycle_depth;
+ uint32_t schedule_queue_len;
+
+ uint32_t jobs_submitted;
+ uint32_t jobs_started;
+ uint32_t jobs_completed;
+ uint32_t jobs_canceled;
+ uint32_t jobs_failed;
+
+ uint32_t backfilled_jobs;
+ uint32_t last_backfilled_jobs;
+ uint32_t bf_cycle_counter;
+ uint32_t bf_cycle_last;
+ uint32_t bf_cycle_max;
+ uint32_t bf_cycle_sum;
+ uint32_t bf_last_depth;
+ uint32_t bf_last_depth_try;
+ uint32_t bf_depth_sum;
+ uint32_t bf_depth_try_sum;
+ uint32_t bf_queue_len;
+ uint32_t bf_queue_len_sum;
+ time_t bf_when_last_cycle;
+ uint32_t bf_active;
+} diag_stats_t;
+
+extern diag_stats_t slurmctld_diag_stats;
extern slurmctld_config_t slurmctld_config;
extern int bg_recover; /* state recovery mode */
extern char *slurmctld_cluster_name; /* name of cluster */
@@ -177,12 +212,14 @@
extern int association_based_accounting;
extern uint32_t cluster_cpus;
extern int with_slurmdbd;
+extern bool load_2_4_state;
/*****************************************************************************\
* NODE parameters and data structures, mostly in src/common/node_conf.h
\*****************************************************************************/
extern uint32_t total_cpus; /* count of CPUs in the entire cluster */
extern bool ping_nodes_now; /* if set, ping nodes immediately */
+extern bool want_nodes_reboot; /* if set, check for idle nodes */
/*****************************************************************************\
* NODE states and bitmaps
@@ -332,10 +369,11 @@
#define JOB_MAGIC 0xf0b7392c
#define STEP_MAGIC 0xce593bc1
-#define FEATURE_OP_OR 0
-#define FEATURE_OP_AND 1
-#define FEATURE_OP_XOR 2
-#define FEATURE_OP_END 3 /* last entry lacks separator */
+#define FEATURE_OP_OR 0
+#define FEATURE_OP_AND 1
+#define FEATURE_OP_XOR 2
+#define FEATURE_OP_XAND 3
+#define FEATURE_OP_END 4 /* last entry lacks separator */
struct feature_record {
char *name; /* name of feature */
uint16_t count; /* count of nodes with this feature */
@@ -421,6 +459,7 @@
struct job_record {
char *account; /* account number to charge */
+ char *alias_list; /* node name to address aliases */
char *alloc_node; /* local node making resource alloc */
uint16_t alloc_resp_port; /* RESPONSE_RESOURCE_ALLOCATION port */
uint32_t alloc_sid; /* local sid making resource alloc */
@@ -481,8 +520,12 @@
* a limit false if user set */
uint16_t limit_set_min_nodes; /* if max_nodes was set from
* a limit false if user set */
+ uint16_t limit_set_pn_min_memory; /* if pn_min_memory was set from
+ * a limit false if user set */
uint16_t limit_set_time; /* if time_limit was set from
* a limit false if user set */
+ uint16_t limit_set_qos; /* if qos_limit was set from
+ * a limit false if user set */
uint16_t mail_type; /* see MAIL_JOB_* in slurm.h */
char *mail_user; /* user to get e-mail notification */
uint32_t magic; /* magic cookie for data integrity */
@@ -934,20 +977,20 @@
int allocate, uid_t submit_uid, struct job_record **job_pptr);
/*
- * job_cancel_by_assoc_id - Cancel all pending and running jobs with a given
+ * job_hold_by_assoc_id - Hold all pending jobs with a given
* association ID. This happens when an association is deleted (e.g. when
* a user is removed from the association database).
- * RET count of cancelled jobs
+ * RET count of held jobs
*/
-extern int job_cancel_by_assoc_id(uint32_t assoc_id);
+extern int job_hold_by_assoc_id(uint32_t assoc_id);
/*
- * job_cancel_by_qos_id - Cancel all pending and running jobs with a given
+ * job_hold_by_qos_id - Hold all pending jobs with a given
* QOS ID. This happens when a QOS is deleted (e.g. when
* a QOS is removed from the association database).
- * RET count of cancelled jobs
+ * RET count of held jobs
*/
-extern int job_cancel_by_qos_id(uint32_t qos_id);
+extern int job_hold_by_qos_id(uint32_t qos_id);
/* Perform checkpoint operation on a job */
extern int job_checkpoint(checkpoint_msg_t *ckpt_ptr, uid_t uid,
@@ -1235,6 +1278,14 @@
int list_compare_config (void *config_entry1, void *config_entry2);
/*
+ * list_find_feature - find an entry in the feature list, see list.h for
+ * documentation
+ * IN key - is feature name or NULL for all features
+ * RET 1 if found, 0 otherwise
+ */
+extern int list_find_feature(void *feature_entry, void *key);
+
+/*
* list_find_part - find an entry in the partition list, see common/list.h
* for documentation
* IN key - partition name or "universal_key" for all partitions
@@ -1312,6 +1363,13 @@
extern void make_node_idle(struct node_record *node_ptr,
struct job_record *job_ptr);
+/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by a miscellaneous limit. This does not re-validate job state,
+ * but relies upon schedule() in src/slurmctld/job_scheduler.c to do so.
+ */
+extern bool misc_policy_job_runnable_state(struct job_record *job_ptr);
+
/* msg_to_slurmd - send given msg_type every slurmd, no args */
extern void msg_to_slurmd (slurm_msg_type_t msg_type);
@@ -1367,6 +1425,10 @@
uint16_t show_flags, uid_t uid,
uint16_t protocol_version);
+/* Pack all scheduling statistics */
+extern void pack_all_stat(int resp, char **buffer_ptr, int *buffer_size,
+ uint16_t protocol_version);
+
/*
* pack_ctld_job_step_info_response_msg - packs job step info
* IN job_id - specific id or zero for all
@@ -1453,6 +1515,13 @@
extern void part_fini (void);
/*
+ * Determine of the specified job can execute right now or is currently
+ * blocked by a partition state or limit. Execute job_limits_check() to
+ * re-validate job state.
+ */
+extern bool part_policy_job_runnable_state(struct job_record *job_ptr);
+
+/*
* partition_in_use - determine whether a partition is in use by a RUNNING
* PENDING or SUSPENDED job
* IN part_name - name of a partition
@@ -1495,9 +1564,9 @@
*/
extern void reset_job_bitmaps (void);
-/* After a node is returned to service, reset the priority of jobs
- * which may have been held due to that node being unavailable */
-extern void reset_job_priority(void);
+/* Reset all scheduling statistics
+ * level IN - clear backfilled_jobs count if set */
+extern void reset_stats(int level);
/*
* restore_node_features - Make node and config (from slurm.conf) fields
@@ -1534,6 +1603,15 @@
*/
extern int send_nodes_to_accounting(time_t event_time);
+/* Set a job's alias_list string */
+extern void set_job_alias_list(struct job_record *job_ptr);
+
+/*
+ * set_job_prio - set a default job priority
+ * IN job_ptr - pointer to the job_record
+ */
+extern void set_job_prio(struct job_record *job_ptr);
+
/*
* set_node_down - make the specified node's state DOWN if possible
* (not in a DRAIN state), kill jobs as needed
diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c
index 3108bca..42c5800 100644
--- a/src/slurmctld/srun_comm.c
+++ b/src/slurmctld/srun_comm.c
@@ -93,6 +93,7 @@
msg_arg = xmalloc(sizeof(resource_allocation_response_msg_t));
msg_arg->job_id = job_ptr->job_id;
msg_arg->node_list = xstrdup(job_ptr->nodes);
+ msg_arg->alias_list = xstrdup(job_ptr->alias_list);
msg_arg->num_cpu_groups = job_resrcs_ptr->cpu_array_cnt;
msg_arg->cpus_per_node = xmalloc(sizeof(uint16_t) *
job_resrcs_ptr->cpu_array_cnt);
@@ -240,6 +241,31 @@
}
/*
+ * srun_step_timeout - notify srun of a job step's imminent timeout
+ * IN step_ptr - pointer to the slurmctld step record
+ * IN timeout_val - when it is going to time out
+ */
+extern void srun_step_timeout(struct step_record *step_ptr, time_t timeout_val)
+{
+ slurm_addr_t *addr;
+ srun_timeout_msg_t *msg_arg;
+
+ xassert(step_ptr);
+
+ if (step_ptr->batch_step || !step_ptr->port
+ || !step_ptr->host || (step_ptr->host[0] == '\0'))
+ return;
+
+ addr = xmalloc(sizeof(struct sockaddr_in));
+ slurm_set_addr(addr, step_ptr->port, step_ptr->host);
+ msg_arg = xmalloc(sizeof(srun_timeout_msg_t));
+ msg_arg->job_id = step_ptr->job_ptr->job_id;
+ msg_arg->step_id = step_ptr->step_id;
+ msg_arg->timeout = timeout_val;
+ _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg);
+}
+
+/*
* srun_timeout - notify srun of a job's imminent timeout
* IN job_ptr - pointer to the slurmctld job record
*/
@@ -267,25 +293,11 @@
step_iterator = list_iterator_create(job_ptr->step_list);
- while ((step_ptr = (struct step_record *) list_next(step_iterator))) {
- if ( (step_ptr->port == 0) ||
- (step_ptr->host == NULL) ||
- (step_ptr->batch_step) ||
- (step_ptr->host[0] == '\0') )
- continue;
- addr = xmalloc(sizeof(struct sockaddr_in));
- slurm_set_addr(addr, step_ptr->port, step_ptr->host);
- msg_arg = xmalloc(sizeof(srun_timeout_msg_t));
- msg_arg->job_id = job_ptr->job_id;
- msg_arg->step_id = step_ptr->step_id;
- msg_arg->timeout = job_ptr->end_time;
- _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT,
- msg_arg);
- }
+ while ((step_ptr = (struct step_record *) list_next(step_iterator)))
+ srun_step_timeout(step_ptr, job_ptr->end_time);
list_iterator_destroy(step_iterator);
}
-
/*
* srun_user_message - Send arbitrary message to an srun job (no job steps)
*/
@@ -449,6 +461,30 @@
}
/*
+ * srun_step_signal - notify srun that a job step should be signalled
+ * NOTE: Needed on BlueGene/Q to signal runjob process
+ * IN step_ptr - pointer to the slurmctld job step record
+ * IN signal - signal number
+ */
+extern void srun_step_signal (struct step_record *step_ptr, uint16_t signal)
+{
+ slurm_addr_t * addr;
+ job_step_kill_msg_t *msg_arg;
+
+ xassert(step_ptr);
+ if (step_ptr->port && step_ptr->host && step_ptr->host[0]) {
+ addr = xmalloc(sizeof(struct sockaddr_in));
+ slurm_set_addr(addr, step_ptr->port, step_ptr->host);
+ msg_arg = xmalloc(sizeof(job_step_kill_msg_t));
+ msg_arg->job_id = step_ptr->job_ptr->job_id;
+ msg_arg->job_step_id = step_ptr->step_id;
+ msg_arg->signal = signal;
+ _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL,
+ msg_arg);
+ }
+}
+
+/*
* srun_exec - request that srun execute a specific command
* and route it's output to stdout
* IN step_ptr - pointer to the slurmctld job step record
diff --git a/src/slurmctld/srun_comm.h b/src/slurmctld/srun_comm.h
index bc5cf3e..1bf5e6d 100644
--- a/src/slurmctld/srun_comm.h
+++ b/src/slurmctld/srun_comm.h
@@ -96,6 +96,14 @@
char *node_list);
/*
+ * srun_step_signal - notify srun that a job step should be signalled
+ * NOTE: Needed on BlueGene/Q to signal runjob process
+ * IN step_ptr - pointer to the slurmctld job step record
+ * IN signal - signal number
+ */
+extern void srun_step_signal (struct step_record *step_ptr, uint16_t signal);
+
+/*
* srun_node_fail - notify srun of a node's failure
* IN job_id - id of job to notify
* IN node_name - name of failed node
@@ -113,6 +121,13 @@
extern void srun_response(uint32_t job_id, uint32_t step_id);
/*
+ * srun_step_timeout - notify srun of a job step's imminent timeout
+ * IN step_ptr - pointer to the slurmctld step record
+ * IN timeout_val - when it is going to time out
+ */
+extern void srun_step_timeout(struct step_record *step_ptr, time_t timeout_val);
+
+/*
* srun_timeout - notify srun of a job's timeout
* IN job_ptr - pointer to the slurmctld job record
*/
diff --git a/src/slurmctld/statistics.c b/src/slurmctld/statistics.c
new file mode 100644
index 0000000..368274f
--- /dev/null
+++ b/src/slurmctld/statistics.c
@@ -0,0 +1,160 @@
+/****************************************************************************\
+ * statistics.c - functions for sdiag command
+ *****************************************************************************
+ * Produced at Barcelona Supercomputing Center, December 2011
+ * Written by Alejandro Lucero <alucero@bsc.es>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "src/slurmctld/agent.h"
+#include "src/slurmctld/slurmctld.h"
+#include "src/common/pack.h"
+#include "src/common/xstring.h"
+#include "src/common/list.h"
+
+extern int retry_list_size(void);
+
+extern time_t last_proc_req_start;
+
+/* Pack all scheduling statistics */
+extern void pack_all_stat(int resp, char **buffer_ptr, int *buffer_size,
+ uint16_t protocol_version)
+{
+ Buf buffer;
+ int parts_packed;
+ int agent_queue_size;
+ time_t now = time(NULL);
+
+ buffer_ptr[0] = NULL;
+ *buffer_size = 0;
+
+ buffer = init_buf(BUF_SIZE);
+
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ parts_packed = resp;
+ pack32(parts_packed, buffer);
+
+ if (resp) {
+ pack_time(now, buffer);
+ debug("pack_all_stat: time = %u",
+ (uint32_t) last_proc_req_start);
+ pack_time(last_proc_req_start, buffer);
+
+ debug("pack_all_stat: server_thread_count = %u",
+ slurmctld_config.server_thread_count);
+ pack32(slurmctld_config.server_thread_count, buffer);
+
+ agent_queue_size = retry_list_size();
+ pack32(agent_queue_size, buffer);
+
+ pack32(slurmctld_diag_stats.jobs_submitted, buffer);
+ pack32(slurmctld_diag_stats.jobs_started, buffer);
+ pack32(slurmctld_diag_stats.jobs_completed, buffer);
+ pack32(slurmctld_diag_stats.jobs_canceled, buffer);
+ pack32(slurmctld_diag_stats.jobs_failed, buffer);
+
+ pack32(slurmctld_diag_stats.schedule_cycle_max,
+ buffer);
+ pack32(slurmctld_diag_stats.schedule_cycle_last,
+ buffer);
+ pack32(slurmctld_diag_stats.schedule_cycle_sum,
+ buffer);
+ pack32(slurmctld_diag_stats.schedule_cycle_counter,
+ buffer);
+ pack32(slurmctld_diag_stats.schedule_cycle_depth,
+ buffer);
+ pack32(slurmctld_diag_stats.schedule_queue_len, buffer);
+
+ pack32(slurmctld_diag_stats.backfilled_jobs, buffer);
+ pack32(slurmctld_diag_stats.last_backfilled_jobs,
+ buffer);
+ pack32(slurmctld_diag_stats.bf_cycle_counter, buffer);
+ pack32(slurmctld_diag_stats.bf_cycle_sum, buffer);
+ pack32(slurmctld_diag_stats.bf_cycle_last, buffer);
+ pack32(slurmctld_diag_stats.bf_last_depth, buffer);
+ pack32(slurmctld_diag_stats.bf_last_depth_try, buffer);
+
+ pack32(slurmctld_diag_stats.bf_queue_len, buffer);
+ pack32(slurmctld_diag_stats.bf_cycle_max, buffer);
+ pack_time(slurmctld_diag_stats.bf_when_last_cycle,
+ buffer);
+ pack32(slurmctld_diag_stats.bf_depth_sum, buffer);
+ pack32(slurmctld_diag_stats.bf_depth_try_sum, buffer);
+ pack32(slurmctld_diag_stats.bf_queue_len_sum, buffer);
+ pack32(slurmctld_diag_stats.bf_active, buffer);
+ }
+ }
+
+ *buffer_size = get_buf_offset(buffer);
+ buffer_ptr[0] = xfer_buf_data(buffer);
+}
+
+/* Reset all scheduling statistics
+ * level IN - clear backfilled_jobs count if set */
+extern void reset_stats(int level)
+{
+ slurmctld_diag_stats.proc_req_raw = 0;
+ slurmctld_diag_stats.proc_req_threads = 0;
+ slurmctld_diag_stats.schedule_cycle_max = 0;
+ slurmctld_diag_stats.schedule_cycle_sum = 0;
+ slurmctld_diag_stats.schedule_cycle_counter = 0;
+ slurmctld_diag_stats.schedule_cycle_depth = 0;
+ slurmctld_diag_stats.jobs_submitted = 0;
+ slurmctld_diag_stats.jobs_started = 0;
+ slurmctld_diag_stats.jobs_completed = 0;
+ slurmctld_diag_stats.jobs_canceled = 0;
+ slurmctld_diag_stats.jobs_failed = 0;
+
+ /* Just resetting this value when reset requested explicitly */
+ if (level)
+ slurmctld_diag_stats.backfilled_jobs = 0;
+
+ slurmctld_diag_stats.last_backfilled_jobs = 0;
+ slurmctld_diag_stats.bf_cycle_counter = 0;
+ slurmctld_diag_stats.bf_cycle_sum = 0;
+ slurmctld_diag_stats.bf_cycle_last = 0;
+ slurmctld_diag_stats.bf_depth_sum = 0;
+ slurmctld_diag_stats.bf_depth_try_sum = 0;
+ slurmctld_diag_stats.bf_queue_len = 0;
+ slurmctld_diag_stats.bf_queue_len_sum = 0;
+ slurmctld_diag_stats.bf_cycle_max = 0;
+ slurmctld_diag_stats.bf_last_depth = 0;
+ slurmctld_diag_stats.bf_last_depth_try = 0;
+ slurmctld_diag_stats.bf_active = 0;
+}
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index f1aff83..3686d4f 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -293,8 +293,9 @@
debug3(" mem_per_cpu=%u resv_port_cnt=%u immediate=%u no_kill=%u",
step_spec->mem_per_cpu, step_spec->resv_port_cnt,
step_spec->immediate, step_spec->no_kill);
- debug3(" overcommit=%d time_limit=%u gres=%s",
- step_spec->overcommit, step_spec->time_limit, step_spec->gres);
+ debug3(" overcommit=%d time_limit=%u gres=%s constraints=%s",
+ step_spec->overcommit, step_spec->time_limit, step_spec->gres,
+ step_spec->features);
}
@@ -340,6 +341,7 @@
{
struct job_record *job_ptr;
struct step_record *step_ptr;
+ int rc = SLURM_SUCCESS;
job_ptr = find_job_record(job_id);
if (job_ptr == NULL) {
@@ -347,9 +349,11 @@
return ESLURM_INVALID_JOB_ID;
}
- if (IS_JOB_FINISHED(job_ptr))
- return ESLURM_ALREADY_DONE;
- if (!IS_JOB_RUNNING(job_ptr)) {
+ if (IS_JOB_FINISHED(job_ptr)) {
+ rc = ESLURM_ALREADY_DONE;
+ if (signal != SIG_NODE_FAIL)
+ return rc;
+ } else if (!IS_JOB_RUNNING(job_ptr)) {
verbose("job_step_signal: step %u.%u can not be sent signal "
"%u from state=%s", job_id, step_id, signal,
job_state_string(job_ptr->job_state));
@@ -369,6 +373,20 @@
return ESLURM_INVALID_JOB_ID;
}
+ /* If SIG_NODE_FAIL codes through it means we had nodes failed
+ so handle that in the select plugin and switch the signal
+ to KILL afterwards.
+ */
+ if (signal == SIG_NODE_FAIL) {
+ select_g_fail_cnode(step_ptr);
+ signal = SIGKILL;
+ if (rc != SLURM_SUCCESS)
+ return rc;
+ }
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ srun_step_signal(step_ptr, signal);
+#endif
+
/* save user ID of the one who requested the job be cancelled */
if (signal == SIGKILL) {
step_ptr->requid = uid;
@@ -688,6 +706,17 @@
if (nodes_avail == NULL)
fatal("bit_copy malloc failure");
bit_and (nodes_avail, up_node_bitmap);
+ if (step_spec->features) {
+ /* We only select for a single feature name here.
+ * Add support for AND, OR, etc. here if desired */
+ struct features_record *feat_ptr;
+ feat_ptr = list_find_first(feature_list, list_find_feature,
+ (void *) step_spec->features);
+ if (feat_ptr && feat_ptr->node_bitmap)
+ bit_and(nodes_avail, feat_ptr->node_bitmap);
+ else
+ bit_nclear(nodes_avail, 0, (bit_size(nodes_avail)-1));
+ }
if (step_spec->mem_per_cpu &&
((job_resrcs_ptr->memory_allocated == NULL) ||
@@ -708,6 +737,7 @@
continue;
node_ptr = node_record_table_ptr + i;
if (IS_NODE_POWER_SAVE(node_ptr) ||
+ IS_NODE_FUTURE(node_ptr) ||
IS_NODE_NO_RESPOND(node_ptr)) {
/* Node is/was powered down. Need to wait
* for it to start responding again. */
@@ -784,7 +814,7 @@
avail_tasks = step_spec->num_tasks;
total_tasks = step_spec->num_tasks;
}
- if (step_spec->mem_per_cpu) {
+ if (step_spec->mem_per_cpu && _is_mem_resv()) {
avail_mem = job_resrcs_ptr->
memory_allocated[node_inx] -
job_resrcs_ptr->memory_used[node_inx];
@@ -806,7 +836,7 @@
node_inx, false,
job_ptr->job_id,
NO_VAL);
- if (cpus_per_task > 0)
+ if ((gres_cnt != NO_VAL) && (cpus_per_task > 0))
gres_cnt /= cpus_per_task;
avail_tasks = MIN(avail_tasks, gres_cnt);
gres_cnt = gres_plugin_step_test(step_gres_list,
@@ -814,10 +844,9 @@
node_inx, true,
job_ptr->job_id,
NO_VAL);
- if (cpus_per_task > 0)
+ if ((gres_cnt != NO_VAL) && (cpus_per_task > 0))
gres_cnt /= cpus_per_task;
total_tasks = MIN(total_tasks, gres_cnt);
-
if (step_spec->plane_size != (uint16_t) NO_VAL) {
if (avail_tasks < step_spec->plane_size)
avail_tasks = 0;
@@ -974,10 +1003,22 @@
goto cleanup;
}
if (!bit_super_set(selected_nodes, nodes_avail)) {
- *return_code = ESLURM_INVALID_TASK_MEMORY;
- info ("_pick_step_nodes: requested nodes %s "
- "have inadequate memory",
- step_spec->node_list);
+ /*
+ * If some nodes still have some memory allocated
+ * to other steps, just defer the execution of the
+ * step
+ */
+ if (mem_blocked_nodes == 0) {
+ *return_code = ESLURM_INVALID_TASK_MEMORY;
+ info ("_pick_step_nodes: requested nodes %s "
+ "have inadequate memory",
+ step_spec->node_list);
+ } else {
+ *return_code = ESLURM_NODES_BUSY;
+ info ("_pick_step_nodes: some requested nodes"
+ " %s still have memory used by other steps",
+ step_spec->node_list);
+ }
FREE_NULL_BITMAP(selected_nodes);
goto cleanup;
}
@@ -1121,7 +1162,7 @@
}
if (step_spec->min_nodes) {
- int cpus_needed, node_avail_cnt, node_idle_cnt, nodes_needed;
+ int cpus_needed, node_avail_cnt, nodes_needed;
if (usable_cpu_cnt == NULL) {
usable_cpu_cnt = xmalloc(sizeof(uint32_t) *
@@ -1171,10 +1212,6 @@
nodes_needed = 0;
}
}
- if (nodes_idle)
- node_idle_cnt = bit_set_count(nodes_idle);
- else
- node_idle_cnt = 0;
if (nodes_avail)
node_avail_cnt = bit_set_count(nodes_avail);
else
@@ -1194,10 +1231,9 @@
usable_cpu_cnt);
if (node_tmp == NULL) {
int avail_node_cnt = bit_set_count(nodes_avail);
- if ((avail_node_cnt < node_idle_cnt) &&
- (step_spec->min_nodes <=
- (avail_node_cnt + nodes_picked_cnt +
- mem_blocked_nodes))) {
+ if (step_spec->min_nodes <=
+ (avail_node_cnt + nodes_picked_cnt +
+ mem_blocked_nodes)) {
*return_code = ESLURM_NODES_BUSY;
} else if (!bit_super_set(job_ptr->node_bitmap,
up_node_bitmap)) {
@@ -1684,6 +1720,9 @@
uint32_t orig_cpu_count;
List step_gres_list = (List) NULL;
dynamic_plugin_data_t *select_jobinfo = NULL;
+#ifdef HAVE_CRAY
+ uint32_t resv_id = 0;
+#endif
#if defined HAVE_BG
static uint16_t cpus_per_mp = (uint16_t)NO_VAL;
#else
@@ -1748,6 +1787,11 @@
if (job_ptr->next_step_id >= slurmctld_conf.max_step_cnt)
return ESLURM_STEP_LIMIT;
+#ifdef HAVE_CRAY
+ select_g_select_jobinfo_get(job_ptr->select_jobinfo,
+ SELECT_JOBDATA_RESV_ID, &resv_id);
+#endif
+
#if defined HAVE_BG
select_g_select_jobinfo_get(job_ptr->select_jobinfo,
SELECT_JOBDATA_NODE_CNT,
@@ -1755,13 +1799,6 @@
#if defined HAVE_BGQ
if (step_specs->min_nodes < node_count) {
- if (step_specs->min_nodes > 512) {
- error("step asked for more than 512 nodes but "
- "less than the allocation, on a "
- "bluegene/Q system that isn't allowed.");
- return ESLURM_INVALID_NODE_COUNT;
- }
- /* We are asking for less than we have. */
node_count = step_specs->min_nodes;
step_specs->min_nodes = 1;
@@ -1842,7 +1879,6 @@
/* make sure this exists since we need it so we don't core on
* a xassert */
select_jobinfo = select_g_select_jobinfo_alloc();
-
nodeset = _pick_step_nodes(job_ptr, step_specs, step_gres_list,
cpus_per_task, node_count, select_jobinfo,
&ret_code);
@@ -1852,6 +1888,10 @@
select_g_select_jobinfo_free(select_jobinfo);
return ret_code;
}
+#ifdef HAVE_CRAY
+ select_g_select_jobinfo_set(select_jobinfo,
+ SELECT_JOBDATA_RESV_ID, &resv_id);
+#endif
#ifdef HAVE_BGQ
/* Things might of changed here since sometimes users ask for
the wrong size in cnodes to make a block.
@@ -2047,14 +2087,17 @@
uint16_t plane_size)
{
uint16_t cpus_per_node[node_count];
- uint32_t cpu_count_reps[node_count], gres_cpus;
+ uint32_t cpu_count_reps[node_count];
+ struct job_record *job_ptr = step_ptr->job_ptr;
+ job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs;
+#ifndef HAVE_BGQ
+ uint32_t gres_cpus;
int cpu_inx = -1;
int i, usable_cpus, usable_mem;
int set_nodes = 0/* , set_tasks = 0 */;
int pos = -1, job_node_offset = -1;
int first_bit, last_bit;
- struct job_record *job_ptr = step_ptr->job_ptr;
- job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs;
+#endif
xassert(job_resrcs_ptr);
xassert(job_resrcs_ptr->cpus);
@@ -2067,7 +2110,16 @@
"to enforce memory limits for job %u", job_ptr->job_id);
step_ptr->mem_per_cpu = 0;
}
+#ifdef HAVE_BGQ
+ /* Since we have to deal with a conversion between cnodes and
+ midplanes here the math is really easy, and already has
+ been figured out for us in the plugin, so just copy the
+ numbers.
+ */
+ memcpy(cpus_per_node, job_resrcs_ptr->cpus, sizeof(cpus_per_node));
+ cpu_count_reps[0] = job_resrcs_ptr->ncpus;
+#else
/* build the cpus-per-node arrays for the subset of nodes
* used by this job step */
first_bit = bit_ffs(job_ptr->node_bitmap);
@@ -2144,7 +2196,7 @@
break;
}
}
-
+#endif
/* if (set_tasks < num_tasks) { */
/* error("Resources only available for %u of %u tasks", */
/* set_tasks, num_tasks); */
@@ -3351,6 +3403,12 @@
static void _signal_step_timelimit(struct job_record *job_ptr,
struct step_record *step_ptr, time_t now)
{
+
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ srun_step_timeout(step_ptr, now);
+ return;
+#endif
+
#ifndef HAVE_FRONT_END
int i;
#endif
diff --git a/src/slurmctld/trigger_mgr.c b/src/slurmctld/trigger_mgr.c
index aa31de0..9dc4ca1 100644
--- a/src/slurmctld/trigger_mgr.c
+++ b/src/slurmctld/trigger_mgr.c
@@ -45,6 +45,10 @@
# include <pthread.h>
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
@@ -68,7 +72,8 @@
#define MAX_PROG_TIME 300 /* maximum run time for program */
/* Change TRIGGER_STATE_VERSION value when changing the state save format */
-#define TRIGGER_STATE_VERSION "VER003"
+#define TRIGGER_STATE_VERSION "VER004"
+#define TRIGGER_2_4_STATE_VERSION "VER004" /* SLURM version 2.4 */
#define TRIGGER_2_2_STATE_VERSION "VER003" /* SLURM version 2.2 */
#define TRIGGER_2_1_STATE_VERSION "VER002" /* SLURM version 2.1 */
@@ -102,6 +107,8 @@
uint8_t dbd_failure = 0;
typedef struct trig_mgr_info {
+ uint32_t child_pid; /* pid of child process */
+ uint16_t flags; /* TRIGGER_FLAG_* */
uint32_t trig_id; /* trigger ID */
uint16_t res_type; /* TRIGGER_RES_TYPE_* */
char * res_id; /* node name or job_id (string) */
@@ -111,17 +118,24 @@
uint32_t trig_type; /* TRIGGER_TYPE_* */
time_t trig_time; /* offset (pending) or time stamp (complete) */
uint32_t user_id; /* user requesting trigger */
- uint32_t group_id; /* user's group id (pending) or pid (complete) */
+ uint32_t group_id; /* user's group id */
char * program; /* program to execute */
uint8_t state; /* 0=pending, 1=pulled, 2=completed */
+
+ /* The orig_ fields are used to save and clone the orignal values */
+ bitstr_t *orig_bitmap; /* bitmap of requested nodes (if applicable) */
+ char * orig_res_id; /* original node name or job_id (string) */
+ time_t orig_time; /* offset (pending) or time stamp (complete) */
} trig_mgr_info_t;
/* Prototype for ListDelF */
void _trig_del(void *x) {
trig_mgr_info_t * tmp = (trig_mgr_info_t *) x;
xfree(tmp->res_id);
+ xfree(tmp->orig_res_id);
xfree(tmp->program);
FREE_NULL_BITMAP(tmp->nodes_bitmap);
+ FREE_NULL_BITMAP(tmp->orig_bitmap);
xfree(tmp);
}
@@ -284,7 +298,7 @@
rc = ESLURM_INVALID_JOB_ID;
goto fini;
}
- } else if ((trig_in->trig_id == 0) && (trig_in->user_id == 0)) {
+ } else if ((trig_in->trig_id == 0) && (trig_in->user_id == NO_VAL)) {
rc = EINVAL;
goto fini;
}
@@ -292,18 +306,20 @@
/* now look for a valid request, matching uid */
trig_iter = list_iterator_create(trigger_list);
while ((trig_test = list_next(trig_iter))) {
- if ((trig_test->user_id != (uint32_t) uid) && (uid != 0))
- continue;
if (trig_in->trig_id &&
(trig_in->trig_id != trig_test->trig_id))
continue;
if (job_id && (job_id != trig_test->job_id))
continue;
- if (trig_in->user_id &&
+ if ((trig_in->user_id != NO_VAL) &&
(trig_in->user_id != trig_test->user_id))
continue;
if (trig_test->state == 2) /* wait for proc termination */
continue;
+ if ((trig_test->user_id != (uint32_t) uid) && (uid != 0)) {
+ rc = ESLURM_ACCESS_DENIED;
+ continue;
+ }
list_delete_item(trig_iter);
rc = SLURM_SUCCESS;
}
@@ -335,8 +351,10 @@
trig_out = resp_data->trigger_array;
while ((trig_in = list_next(trig_iter))) {
/* Note: Filtering currently done by strigger */
- if (trig_in->state >= 1)
+ if ((trig_in->state >= 1) &&
+ ((trig_out->flags & TRIGGER_FLAG_PERM) == 0))
continue; /* no longer pending */
+ trig_out->flags = trig_in->flags;
trig_out->trig_id = trig_in->trig_id;
trig_out->res_type = trig_in->res_type;
trig_out->res_id = xstrdup(trig_in->res_id);
@@ -420,15 +438,22 @@
msg->trigger_array[i].trig_id = next_trigger_id;
trig_add->trig_id = next_trigger_id;
next_trigger_id++;
+ trig_add->flags = msg->trigger_array[i].flags;
trig_add->res_type = msg->trigger_array[i].res_type;
- trig_add->nodes_bitmap = bitmap;
+ if (bitmap) {
+ trig_add->nodes_bitmap = bitmap;
+ trig_add->orig_bitmap = bit_copy(bitmap);
+ }
trig_add->job_id = job_id;
trig_add->job_ptr = job_ptr;
- /* move don't copy "res_id" */
- trig_add->res_id = msg->trigger_array[i].res_id;
- msg->trigger_array[i].res_id = NULL;
+ if (msg->trigger_array[i].res_id) {
+ trig_add->res_id = msg->trigger_array[i].res_id;
+ trig_add->orig_res_id = xstrdup(trig_add->res_id);
+ msg->trigger_array[i].res_id = NULL; /* moved */
+ }
trig_add->trig_type = msg->trigger_array[i].trig_type;
trig_add->trig_time = msg->trigger_array[i].offset;
+ trig_add->orig_time = msg->trigger_array[i].offset;
trig_add->user_id = (uint32_t) uid;
trig_add->group_id = (uint32_t) gid;
/* move don't copy "program" */
@@ -631,14 +656,15 @@
safe_pack8(dbd_failure, buffer);
safe_pack8(db_failure, buffer);
+ pack16 (trig_ptr->flags, buffer);
pack32 (trig_ptr->trig_id, buffer);
pack16 (trig_ptr->res_type, buffer);
- packstr (trig_ptr->res_id, buffer);
+ packstr (trig_ptr->orig_res_id, buffer); /* restores res_id too */
/* rebuild nodes_bitmap as needed from res_id */
/* rebuild job_id as needed from res_id */
/* rebuild job_ptr as needed from res_id */
pack32 (trig_ptr->trig_type, buffer);
- pack_time(trig_ptr->trig_time, buffer);
+ pack_time(trig_ptr->orig_time, buffer); /* restores trig_time too */
pack32 (trig_ptr->user_id, buffer);
pack32 (trig_ptr->group_id, buffer);
packstr (trig_ptr->program, buffer);
@@ -651,7 +677,28 @@
uint32_t str_len;
trig_ptr = xmalloc(sizeof(trig_mgr_info_t));
- if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
+
+ if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
+ /* restore trigger pull state flags */
+ safe_unpack8(&ctld_failure, buffer);
+ safe_unpack8(&bu_ctld_failure, buffer);
+ safe_unpack8(&dbd_failure, buffer);
+ safe_unpack8(&db_failure, buffer);
+
+ safe_unpack16 (&trig_ptr->flags, buffer);
+ safe_unpack32 (&trig_ptr->trig_id, buffer);
+ safe_unpack16 (&trig_ptr->res_type, buffer);
+ safe_unpackstr_xmalloc(&trig_ptr->res_id, &str_len, buffer);
+ /* rebuild nodes_bitmap as needed from res_id */
+ /* rebuild job_id as needed from res_id */
+ /* rebuild job_ptr as needed from res_id */
+ safe_unpack32 (&trig_ptr->trig_type, buffer);
+ safe_unpack_time(&trig_ptr->trig_time, buffer);
+ safe_unpack32 (&trig_ptr->user_id, buffer);
+ safe_unpack32 (&trig_ptr->group_id, buffer);
+ safe_unpackstr_xmalloc(&trig_ptr->program, &str_len, buffer);
+ safe_unpack8 (&trig_ptr->state, buffer);
+ } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) {
/* restore trigger pull state flags */
safe_unpack8(&ctld_failure, buffer);
safe_unpack8(&bu_ctld_failure, buffer);
@@ -707,7 +754,12 @@
&trig_ptr->nodes_bitmap) != 0))
goto unpack_error;
}
-
+ if (trig_ptr->nodes_bitmap)
+ trig_ptr->orig_bitmap = bit_copy(trig_ptr->nodes_bitmap);
+ if (trig_ptr->res_id)
+ trig_ptr->orig_res_id = xstrdup(trig_ptr->res_id);
+ trig_ptr->orig_time = trig_ptr->trig_time;
+
slurm_mutex_lock(&trigger_mutex);
if (trigger_list == NULL)
trigger_list = list_create(_trig_del);
@@ -890,6 +942,8 @@
if (ver_str) {
if (!strcmp(ver_str, TRIGGER_STATE_VERSION)) {
protocol_version = SLURM_PROTOCOL_VERSION;
+ } else if (!strcmp(ver_str, TRIGGER_2_2_STATE_VERSION)) {
+ protocol_version = SLURM_2_2_PROTOCOL_VERSION;
} else if (!strcmp(ver_str, TRIGGER_2_1_STATE_VERSION)) {
protocol_version = SLURM_2_1_PROTOCOL_VERSION;
}
@@ -1421,7 +1475,7 @@
char *pname, *uname;
uid_t uid;
gid_t gid;
- pid_t child;
+ pid_t child_pid;
if (!_validate_trigger(trig_in))
return;
@@ -1439,10 +1493,10 @@
snprintf(user_name, sizeof(user_name), "%s", uname);
xfree(uname);
- child = fork();
- if (child > 0) {
- trig_in->group_id = child;
- } else if (child == 0) {
+ child_pid = fork();
+ if (child_pid > 0) {
+ trig_in->child_pid = child_pid;
+ } else if (child_pid == 0) {
int i;
bool run_as_self = (uid == getuid());
@@ -1509,6 +1563,34 @@
trigger_pri_db_res_op = false;
}
+/* Make a copy of a trigger and pre-pend it on our list */
+static void _trigger_clone(trig_mgr_info_t *trig_in)
+{
+ trig_mgr_info_t *trig_add;
+
+ trig_add = xmalloc(sizeof(trig_mgr_info_t));
+ trig_add->flags = trig_in->flags;
+ trig_add->trig_id = trig_in->trig_id;
+ trig_add->res_type = trig_in->res_type;
+ if (trig_in->orig_res_id) {
+ trig_add->res_id = xstrdup(trig_in->orig_res_id);
+ trig_add->orig_res_id = xstrdup(trig_in->orig_res_id);
+ }
+ if (trig_in->orig_bitmap) {
+ trig_add->nodes_bitmap = bit_copy(trig_in->orig_bitmap);
+ trig_add->orig_bitmap = bit_copy(trig_in->orig_bitmap);
+ }
+ trig_add->job_id = trig_in->job_id;
+ trig_add->job_ptr = trig_in->job_ptr;
+ trig_add->trig_type = trig_in->trig_type;
+ trig_add->trig_time = trig_in->orig_time;
+ trig_add->orig_time = trig_in->orig_time;
+ trig_add->user_id = trig_in->user_id;
+ trig_add->group_id = trig_in->group_id;
+ trig_add->program = xstrdup(trig_in->program);;
+ list_prepend(trigger_list, trig_add);
+}
+
extern void trigger_process(void)
{
ListIterator trig_iter;
@@ -1551,6 +1633,9 @@
trig_in->user_id, trig_in->group_id,
trig_in->program, trig_in->res_id);
}
+ if (trig_in->flags & TRIGGER_FLAG_PERM) {
+ _trigger_clone(trig_in);
+ }
trig_in->state = 2;
trig_in->trig_time = now;
state_change = true;
@@ -1558,9 +1643,9 @@
} else if ((trig_in->state == 2) &&
(difftime(now, trig_in->trig_time) >
MAX_PROG_TIME)) {
- if (trig_in->group_id != 0) {
- killpg(trig_in->group_id, SIGKILL);
- rc = waitpid(trig_in->group_id, &prog_stat,
+ if (trig_in->child_pid != 0) {
+ killpg(trig_in->child_pid, SIGKILL);
+ rc = waitpid(trig_in->child_pid, &prog_stat,
WNOHANG);
if ((rc > 0) && prog_stat) {
info("trigger uid=%u type=%s:%s "
@@ -1571,12 +1656,12 @@
WIFEXITED(prog_stat),
WTERMSIG(prog_stat));
}
- if ((rc == trig_in->group_id) ||
+ if ((rc == trig_in->child_pid) ||
((rc == -1) && (errno == ECHILD)))
- trig_in->group_id = 0;
+ trig_in->child_pid = 0;
}
- if (trig_in->group_id == 0) {
+ if (trig_in->child_pid == 0) {
if (slurm_get_debug_flags() &
DEBUG_FLAG_TRIGGERS) {
info("purging trigger[%u]",
@@ -1588,7 +1673,7 @@
} else if (trig_in->state == 2) {
/* Elimiate zombie processes right away.
* Purge trigger entry above MAX_PROG_TIME later */
- rc = waitpid(trig_in->group_id, &prog_stat, WNOHANG);
+ rc = waitpid(trig_in->child_pid, &prog_stat, WNOHANG);
if ((rc > 0) && prog_stat) {
info("trigger uid=%u type=%s:%s exit=%u:%u",
trig_in->user_id,
@@ -1597,9 +1682,9 @@
WIFEXITED(prog_stat),
WTERMSIG(prog_stat));
}
- if ((rc == trig_in->group_id) ||
+ if ((rc == trig_in->child_pid) ||
((rc == -1) && (errno == ECHILD)))
- trig_in->group_id = 0;
+ trig_in->child_pid = 0;
}
}
list_iterator_destroy(trig_iter);
diff --git a/src/slurmd/Makefile.in b/src/slurmd/Makefile.in
index b399b89..a31667a 100644
--- a/src/slurmd/Makefile.in
+++ b/src/slurmd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -573,10 +575,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/slurmd/common/Makefile.in b/src/slurmd/common/Makefile.in
index 169390c..55501b4 100644
--- a/src/slurmd/common/Makefile.in
+++ b/src/slurmd/common/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -184,6 +184,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -220,6 +221,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -365,7 +367,7 @@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libslurmd_common.la: $(libslurmd_common_la_OBJECTS) $(libslurmd_common_la_DEPENDENCIES)
+libslurmd_common.la: $(libslurmd_common_la_OBJECTS) $(libslurmd_common_la_DEPENDENCIES) $(EXTRA_libslurmd_common_la_DEPENDENCIES)
$(LINK) $(libslurmd_common_la_OBJECTS) $(libslurmd_common_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -504,10 +506,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/slurmd/common/run_script.c b/src/slurmd/common/run_script.c
index 6fc3309..61452b6 100644
--- a/src/slurmd/common/run_script.c
+++ b/src/slurmd/common/run_script.c
@@ -40,17 +40,64 @@
# include "config.h"
#endif
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
+#include <poll.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <sys/errno.h>
#include <string.h>
+#include <glob.h>
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/common/xassert.h"
+#include "src/common/list.h"
#include "src/slurmd/common/run_script.h"
+/*
+ * Same as waitpid(2) but kill process group for pid after timeout secs.
+ * Returns 0 for valid status in pstatus, -1 on failure of waitpid(2).
+ */
+int waitpid_timeout (const char *name, pid_t pid, int *pstatus, int timeout)
+{
+ int timeout_ms = 1000 * timeout; /* timeout in ms */
+ int max_delay = 1000; /* max delay between waitpid calls */
+ int delay = 10; /* initial delay */
+ int rc;
+ int options = WNOHANG;
+
+ if (timeout <= 0)
+ options = 0;
+
+ while ((rc = waitpid (pid, pstatus, options)) <= 0) {
+ if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+ error("waidpid: %m");
+ return (-1);
+ }
+ else if (timeout_ms <= 0) {
+ info ("%s%stimeout after %ds: killing pgid %d",
+ name != NULL ? name : "",
+ name != NULL ? ": " : "",
+ timeout, pid);
+ killpg(pid, SIGKILL);
+ options = 0;
+ }
+ else {
+ poll(NULL, 0, delay);
+ timeout_ms -= delay;
+ delay = MIN (timeout_ms, MIN(max_delay, delay*2));
+ }
+ }
+
+ killpg(pid, SIGKILL); /* kill children too */
+ return (0);
+}
/*
* Run a prolog or epilog script (does NOT drop privileges)
@@ -62,11 +109,11 @@
* if NULL
* RET 0 on success, -1 on failure.
*/
-int
-run_script(const char *name, const char *path, uint32_t jobid,
+static int
+run_one_script(const char *name, const char *path, uint32_t jobid,
int max_wait, char **env)
{
- int status, rc, opt;
+ int status;
pid_t cpid;
xassert(env);
@@ -104,29 +151,88 @@
exit(127);
}
- if (max_wait < 0)
- opt = 0;
- else
- opt = WNOHANG;
+ if (waitpid_timeout(name, cpid, &status, max_wait) < 0)
+ return (-1);
+ return status;
+}
- while (1) {
- rc = waitpid(cpid, &status, opt);
- if (rc < 0) {
- if (errno == EINTR)
- continue;
- error("waidpid: %m");
- return 0;
- } else if (rc == 0) {
- sleep(1);
- if ((--max_wait) == 0) {
- killpg(cpid, SIGKILL);
- opt = 0;
- }
- } else {
- killpg(cpid, SIGKILL); /* kill children too */
- return status;
- }
+static void _xfree_f (void *x)
+{
+ xfree (x);
+}
+
+
+static int _ef (const char *p, int errnum)
+{
+ return error ("run_script: glob: %s: %s", p, strerror (errno));
+}
+
+static List _script_list_create (const char *pattern)
+{
+ glob_t gl;
+ size_t i;
+ List l = NULL;
+
+ if (pattern == NULL)
+ return (NULL);
+
+ int rc = glob (pattern, GLOB_ERR, _ef, &gl);
+ switch (rc) {
+ case 0:
+ l = list_create ((ListDelF) _xfree_f);
+ if (l == NULL)
+ fatal("run_script: list_create: Out of memory");
+ for (i = 0; i < gl.gl_pathc; i++)
+ list_push (l, xstrdup (gl.gl_pathv[i]));
+ break;
+ case GLOB_NOMATCH:
+ break;
+ case GLOB_NOSPACE:
+ error ("run_script: glob(3): Out of memory");
+ break;
+ case GLOB_ABORTED:
+ error ("run_script: cannot read dir %s: %m", pattern);
+ break;
+ default:
+ error ("Unknown glob(3) return code = %d", rc);
+ break;
}
- /* NOTREACHED */
+ globfree (&gl);
+
+ return l;
}
+
+int run_script(const char *name, const char *pattern, uint32_t jobid,
+ int max_wait, char **env)
+{
+ int rc = 0;
+ List l;
+ ListIterator i;
+ char *s;
+
+ if (pattern == NULL || pattern[0] == '\0')
+ return 0;
+
+ l = _script_list_create (pattern);
+ if (l == NULL)
+ return error ("Unable to run %s [%s]", name, pattern);
+
+ i = list_iterator_create (l);
+ if (i == NULL)
+ fatal ("run_script: list_iterator_create: Out of memory");
+
+ while ((s = list_next (i))) {
+ rc = run_one_script (name, s, jobid, max_wait, env);
+ if (rc) {
+ error ("%s: exited with status 0x%04x\n", s, rc);
+ break;
+ }
+
+ }
+ list_iterator_destroy (i);
+ list_destroy (l);
+
+ return rc;
+}
+
diff --git a/src/slurmd/common/run_script.h b/src/slurmd/common/run_script.h
index 8246341..6354870 100644
--- a/src/slurmd/common/run_script.h
+++ b/src/slurmd/common/run_script.h
@@ -44,6 +44,17 @@
#include <inttypes.h>
/*
+ * Same as waitpid(2) but kill process group for pid after timeout secs.
+ * name IN: name or class of program we're waiting on (for log messages)
+ * pid IN: child on which to call waitpid(2)
+ * pstatus IN: pointer to integer status
+ * timeout IN: timeout in seconds
+ *
+ * Returns 0 for valid status in pstatus, -1 on failure of waitpid(2).
+ */
+int waitpid_timeout (const char *name, pid_t pid, int *pstatus, int timeout);
+
+/*
* Run a prolog or epilog script (does NOT drop privileges)
* name IN: class of program (prolog, epilog, etc.),
* path IN: pathname of program to run
diff --git a/src/slurmd/common/set_oomadj.c b/src/slurmd/common/set_oomadj.c
index ef123af..e89ea4b 100644
--- a/src/slurmd/common/set_oomadj.c
+++ b/src/slurmd/common/set_oomadj.c
@@ -65,6 +65,12 @@
oom_adj_file);
return -1;
}
+ /* Convert range from [-1000,1000] to [-17,15]
+ * for use with older Linux kernel before 2.6.36 */
+ if (adj < 0)
+ adj = (adj * 17) / 1000;
+ else if (adj > 0)
+ adj = (adj * 15) / 1000;
} else {
error("failed to open %s: %m", oom_adj_file);
return -1;
diff --git a/src/slurmd/common/setproctitle.c b/src/slurmd/common/setproctitle.c
index 91a5e8e..8d5c07b 100644
--- a/src/slurmd/common/setproctitle.c
+++ b/src/slurmd/common/setproctitle.c
@@ -85,6 +85,10 @@
# endif
#endif
+#if defined(__NetBSD__)
+#include <stdlib.h>
+#include <string.h>
+#endif
#ifndef HAVE_SETPROCTITLE
#include <stdlib.h>
#include <stdio.h>
@@ -260,7 +264,11 @@
save_argc = argc;
save_argv = argv;
+#if defined(__NetBSD__)
+ setprogname (argv[0]);
+#else
_init__progname (argv[0]);
+#endif
#if SETPROCTITLE_STRATEGY == PS_USE_CLOBBER_ARGV
/*
diff --git a/src/slurmd/common/slurmstepd_init.c b/src/slurmd/common/slurmstepd_init.c
index e33b7b2..0a07da3 100644
--- a/src/slurmd/common/slurmstepd_init.c
+++ b/src/slurmd/common/slurmstepd_init.c
@@ -46,6 +46,9 @@
pack16(conf->cores, buffer);
pack16(conf->threads, buffer);
pack32(conf->real_memory_size, buffer);
+ pack16(conf->block_map_size, buffer);
+ pack16_array(conf->block_map, conf->block_map_size, buffer);
+ pack16_array(conf->block_map_inv, conf->block_map_size, buffer);
packstr(conf->spooldir, buffer);
packstr(conf->node_name, buffer);
packstr(conf->logfile, buffer);
@@ -72,6 +75,9 @@
safe_unpack16(&conf->cores, buffer);
safe_unpack16(&conf->threads, buffer);
safe_unpack32(&conf->real_memory_size, buffer);
+ safe_unpack16(&conf->block_map_size, buffer);
+ safe_unpack16_array(&conf->block_map, &uint32_tmp, buffer);
+ safe_unpack16_array(&conf->block_map_inv, &uint32_tmp, buffer);
safe_unpackstr_xmalloc(&conf->spooldir, &uint32_tmp, buffer);
safe_unpackstr_xmalloc(&conf->node_name, &uint32_tmp, buffer);
safe_unpackstr_xmalloc(&conf->logfile, &uint32_tmp, buffer);
diff --git a/src/slurmd/private.key b/src/slurmd/private.key
deleted file mode 100644
index f68deef..0000000
--- a/src/slurmd/private.key
+++ /dev/null
@@ -1,15 +0,0 @@
------BEGIN RSA PRIVATE KEY-----
-MIICWwIBAAKBgQCmRolP9oC50gM8+Lnfi7NLvLLBo7tQrf1qIrijfbeyR8WB1MJq
-7b025CU6Dm4RXmeYSDj7TUOj9rZxHg9z92B+LVc341sL87HAOzzOLIoYfJ/PEStU
-v8t5ZCAumb5PIzit5mvZFU5HnUbhGN9uyo2AZrlgPgDXgMScioBI/ZKZfwIDAQAB
-AoGAEud0FxZEflvlKFpy73p3kQXMIP3Xfw25AQcfjN7vMqK8Ts18qSGMYKBBpz8a
-vnjIOGUFrXSUWyDijyvGv/2BB8h+1zSn5GImYWC0dV0MWx6BLYhvHWU5KsLhOYPj
-OoCG+Uv7dwIj4rURaKHLb88+a6jd2tfAvpam1ohaOgcYZbECQQDRMwJWeYGsFrqu
-x1X+/6fpYeGIusTj4lnh+O3HLnYLAvsVg+VUvYy5fky+gM4c641sbTi0icR1ngla
-bbMTaftXAkEAy3lBSJvTqqIDNZujfFU2uK6EYIcs3NGoQDif68oSNKREmQOcSnJl
-t+eUnvIJRj34owubKhof8s+KUSIRYrSiGQJAG9DWfL0HOuSCyf6DNrazktqK7G05
-BLvyC5PXueY5l9k7nW6+LXhOn9NlTOc/gKTpVwNmyMqQrrrmr3lqmCg9XQI/ZPDC
-ckLbvsqgzPdpI0Ui4HuzMC/fEazwyutMaqzUW0kivsYLwuL9kz5PfIqWD0KvFjOr
-xjcvokVTWOyNs2npAkEAh/XG80JXxdML7VyjiQIVc8W79Rjf2yEwBWCOz8OeLUBV
-NpRtdnHhbZvpd13oTnq8zRGY1xdAX7qkl4RfxGOx/g==
------END RSA PRIVATE KEY-----
diff --git a/src/slurmd/public.cert b/src/slurmd/public.cert
deleted file mode 100644
index e07f489..0000000
--- a/src/slurmd/public.cert
+++ /dev/null
@@ -1,6 +0,0 @@
------BEGIN PUBLIC KEY-----
-MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCmRolP9oC50gM8+Lnfi7NLvLLB
-o7tQrf1qIrijfbeyR8WB1MJq7b025CU6Dm4RXmeYSDj7TUOj9rZxHg9z92B+LVc3
-41sL87HAOzzOLIoYfJ/PEStUv8t5ZCAumb5PIzit5mvZFU5HnUbhGN9uyo2AZrlg
-PgDXgMScioBI/ZKZfwIDAQAB
------END PUBLIC KEY-----
diff --git a/src/slurmd/slurmd/Makefile.am b/src/slurmd/slurmd/Makefile.am
index d89bdd8..963be2b 100644
--- a/src/slurmd/slurmd/Makefile.am
+++ b/src/slurmd/slurmd/Makefile.am
@@ -6,12 +6,12 @@
sbin_PROGRAMS = slurmd
AM_CPPFLAGS = -DLIBSLURM_SO=\"$(libdir)/libslurm.so\"
-INCLUDES = -I$(top_srcdir)
+INCLUDES = -I$(top_srcdir) $(HWLOC_CPPFLAGS)
slurmd_LDADD = \
$(top_builddir)/src/common/libdaemonize.la \
$(top_builddir)/src/api/libslurm.o $(DL_LIBS) \
- $(PLPA_LIBS) \
+ $(PLPA_LIBS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS) \
../common/libslurmd_common.la
SLURMD_SOURCES = \
diff --git a/src/slurmd/slurmd/Makefile.in b/src/slurmd/slurmd/Makefile.in
index ea39c06..dd7d8da 100644
--- a/src/slurmd/slurmd/Makefile.in
+++ b/src/slurmd/slurmd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -95,6 +95,7 @@
am__DEPENDENCIES_1 =
slurmd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \
$(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
../common/libslurmd_common.la
slurmd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmd_LDFLAGS) \
@@ -194,6 +195,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +232,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -319,11 +322,11 @@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign
AM_CPPFLAGS = -DLIBSLURM_SO=\"$(libdir)/libslurm.so\"
-INCLUDES = -I$(top_srcdir)
+INCLUDES = -I$(top_srcdir) $(HWLOC_CPPFLAGS)
slurmd_LDADD = \
$(top_builddir)/src/common/libdaemonize.la \
$(top_builddir)/src/api/libslurm.o $(DL_LIBS) \
- $(PLPA_LIBS) \
+ $(PLPA_LIBS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS) \
../common/libslurmd_common.la
SLURMD_SOURCES = \
@@ -417,7 +420,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-slurmd$(EXEEXT): $(slurmd_OBJECTS) $(slurmd_DEPENDENCIES)
+slurmd$(EXEEXT): $(slurmd_OBJECTS) $(slurmd_DEPENDENCIES) $(EXTRA_slurmd_DEPENDENCIES)
@rm -f slurmd$(EXEEXT)
$(slurmd_LINK) $(slurmd_OBJECTS) $(slurmd_LDADD) $(LIBS)
@@ -560,10 +563,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c
index f134662..a5aa56d 100644
--- a/src/slurmd/slurmd/get_mach_stat.c
+++ b/src/slurmd/slurmd/get_mach_stat.c
@@ -68,6 +68,8 @@
#if defined(HAVE_AIX) || defined(__sun) || defined(__APPLE__)
# include <sys/times.h>
# include <sys/types.h>
+#elif defined(__NetBSD__)
+# include <sys/times.h> /* for times(3) */
#else
/* NOTE: Getting the system uptime on AIX uses completely different logic.
* sys/sysinfo.h on AIX defines structures that conflict with SLURM code. */
@@ -99,108 +101,6 @@
/* #define DEBUG_DETAIL 1 */ /* enable detailed debugging within SLURM */
-#if DEBUG_MODULE
-#define DEBUG_DETAIL 1
-#define error printf
-#define debug printf
-#define debug1 printf
-#define debug2 printf
-#define debug3 printf
-#undef xmalloc
-#define xmalloc malloc
-#undef xfree
-#define xfree free
-/* main is used here for testing purposes only: */
-/* % gcc -DDEBUG_MODULE get_mach_stat.c -I../../.. -g -DUSE_CPU_SPEED */
-int
-main(int argc, char * argv[])
-{
- int error_code;
- uint16_t sockets, cores, threads;
- uint16_t block_map_size;
- uint16_t *block_map, *block_map_inv;
- struct config_record this_node;
- char node_name[MAX_SLURM_NAME];
- float speed;
- uint16_t testnumproc = 0;
- uint32_t up_time = 0;
- int days, hours, mins, secs;
- char* _cpuinfo_path = "/proc/cpuinfo";
-
- if (argc > 1) {
- _cpuinfo_path = argv[1];
- testnumproc = 1024; /* since may not match test host */
- }
- debug3("%s:", _cpuinfo_path);
-
- error_code = get_mach_name(node_name);
- if (error_code != 0)
- exit(1); /* The show is all over without a node name */
-
- error_code += get_procs(&this_node.cpus);
- error_code += get_cpuinfo(MAX(this_node.cpus, testnumproc),
- &this_node.sockets,
- &this_node.cores,
- &this_node.threads,
- &block_map_size,
- &block_map, &block_map_inv);
- xfree(block_map); /* not used here */
- xfree(block_map_inv); /* not used here */
- error_code += get_memory(&this_node.real_memory);
- error_code += get_tmp_disk(&this_node.tmp_disk, "/tmp");
- error_code += get_up_time(&up_time);
-#ifdef USE_CPU_SPEED
- error_code += get_speed(&speed);
-#endif
-
- debug3("");
- debug3("NodeName=%s CPUs=%u Sockets=%u Cores=%u Threads=%u",
- node_name, this_node.cpus,
- this_node.sockets, this_node.cores, this_node.threads);
- debug3("\tRealMemory=%u TmpDisk=%u Speed=%f",
- this_node.real_memory, this_node.tmp_disk, speed);
- secs = up_time % 60;
- mins = (up_time / 60) % 60;
- hours = (up_time / 3600) % 24;
- days = (up_time / 86400);
- debug3("\tUpTime=%u=%u-%2.2u:%2.2u:%2.2u",
- up_time, days, hours, mins, secs);
- if (error_code != 0)
- debug3("get_mach_stat error_code=%d encountered", error_code);
- exit (error_code);
-}
-
-
-/* gethostname_short - equivalent to gethostname, but return only the first
- * component of the fully qualified name
- * (e.g. "linux123.foo.bar" becomes "linux123")
- * OUT name
- */
-int
-gethostname_short (char *name, size_t len)
-{
- int error_code, name_len;
- char *dot_ptr, path_name[1024];
-
- error_code = gethostname (path_name, sizeof(path_name));
- if (error_code)
- return error_code;
-
- dot_ptr = strchr (path_name, '.');
- if (dot_ptr == NULL)
- dot_ptr = path_name + strlen(path_name);
- else
- dot_ptr[0] = '\0';
-
- name_len = (dot_ptr - path_name);
- if (name_len > len)
- return ENAMETOOLONG;
-
- strcpy (name, path_name);
- return 0;
-}
-#endif
-
#ifdef USE_OS_NAME
/*
* get_os_name - Return the operating system name and version
@@ -282,7 +182,7 @@
int mem;
size_t len = sizeof(mem);
if (sysctlbyname("hw.physmem", &mem, &len, NULL, 0) == -1) {
- error("get_procs: error running sysctl(HW_PHYSMEM)");
+ error("get_memory: error running sysctl(HW_PHYSMEM)");
return EINVAL;
}
*real_memory = mem;
@@ -343,7 +243,7 @@
extern int get_up_time(uint32_t *up_time)
{
-#if defined(HAVE_AIX) || defined(__sun) || defined(__APPLE__)
+#if defined(HAVE_AIX) || defined(__sun) || defined(__APPLE__) || defined(__NetBSD__)
clock_t tm;
struct tms buf;
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index add5e03..64117da 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -53,6 +53,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/un.h>
#include <utime.h>
#include <grp.h>
@@ -78,6 +79,7 @@
#include "src/common/util-net.h"
#include "src/common/xstring.h"
#include "src/common/xmalloc.h"
+#include "src/common/plugstack.h"
#include "src/slurmd/slurmd/slurmd.h"
#include "src/slurmd/slurmd/reverse_tree_math.h"
@@ -148,6 +150,7 @@
static void _rpc_update_time(slurm_msg_t *);
static void _rpc_shutdown(slurm_msg_t *msg);
static void _rpc_reconfig(slurm_msg_t *msg);
+static void _rpc_reboot(slurm_msg_t *msg);
static void _rpc_pid2jid(slurm_msg_t *msg);
static int _rpc_file_bcast(slurm_msg_t *msg);
static int _rpc_ping(slurm_msg_t *);
@@ -160,6 +163,8 @@
char **spank_job_env, uint32_t spank_job_env_size);
static int _run_epilog(uint32_t jobid, uid_t uid, char *resv_id,
char **spank_job_env, uint32_t spank_job_env_size);
+static void _rpc_forward_data(slurm_msg_t *msg);
+
static bool _pause_for_job_completion(uint32_t jobid, char *nodes,
int maxtime);
@@ -314,6 +319,10 @@
last_slurmctld_msg = time(NULL);
/* No body to free */
break;
+ case REQUEST_REBOOT_NODES:
+ _rpc_reboot(msg);
+ slurm_free_reboot_msg(msg->data);
+ break;
case REQUEST_NODE_REGISTRATION_STATUS:
/* Treat as ping (for slurmctld agent, just return SUCCESS) */
rc = _rpc_ping(msg);
@@ -362,6 +371,10 @@
_rpc_job_notify(msg);
slurm_free_job_notify_msg(msg->data);
break;
+ case REQUEST_FORWARD_DATA:
+ _rpc_forward_data(msg);
+ slurm_free_forward_data_msg(msg->data);
+ break;
default:
error("slurmd_req: invalid request msg type %d",
msg->msg_type);
@@ -370,6 +383,19 @@
}
return;
}
+static int _send_slurmd_conf_lite (int fd, slurmd_conf_t *cf)
+{
+ int len;
+ Buf buffer = init_buf(0);
+ pack_slurmd_conf_lite(cf, buffer);
+ len = get_buf_offset(buffer);
+ safe_write(fd, &len, sizeof(int));
+ safe_write(fd, get_buf_data(buffer), len);
+ free_buf(buffer);
+ return (0);
+ rwfail:
+ return (-1);
+}
static int
_send_slurmstepd_init(int fd, slurmd_step_type_t type, void *req,
@@ -405,6 +431,15 @@
children = 0;
depth = 0;
max_depth = 0;
+ } else if ((type == LAUNCH_TASKS) &&
+ (((launch_tasks_request_msg_t *)req)->alias_list)) {
+ /* In the cloud, each task talks directly to the slurmctld
+ * since node addressing is abnormal */
+ rank = 0;
+ parent_rank = -1;
+ children = 0;
+ depth = 0;
+ max_depth = 0;
} else {
#ifndef HAVE_FRONT_END
int count;
@@ -416,7 +451,7 @@
if (rank > 0) { /* rank 0 talks directly to the slurmctld */
int rc;
/* Find the slurm_addr_t of this node's parent slurmd
- in the step host list */
+ * in the step host list */
parent_alias = hostset_nth(step_hset, parent_rank);
rc = slurm_conf_get_addr(parent_alias, &parent_addr);
if (rc != SLURM_SUCCESS) {
@@ -455,12 +490,8 @@
safe_write(fd, &parent_addr, sizeof(slurm_addr_t));
/* send conf over to slurmstepd */
- buffer = init_buf(0);
- pack_slurmd_conf_lite(conf, buffer);
- len = get_buf_offset(buffer);
- safe_write(fd, &len, sizeof(int));
- safe_write(fd, get_buf_data(buffer), len);
- free_buf(buffer);
+ if (_send_slurmd_conf_lite(fd, conf) < 0)
+ goto rwfail;
/* send cli address over to slurmstepd */
buffer = init_buf(0);
@@ -471,7 +502,7 @@
free_buf(buffer);
/* send self address over to slurmstepd */
- if(self) {
+ if (self) {
buffer = init_buf(0);
slurm_pack_slurm_addr(self, buffer);
len = get_buf_offset(buffer);
@@ -483,6 +514,9 @@
safe_write(fd, &len, sizeof(int));
}
+ /* Send GRES information to slurmstepd */
+ gres_plugin_send_stepd(fd);
+
/* send req over to slurmstepd */
switch(type) {
case LAUNCH_BATCH_JOB:
@@ -504,6 +538,7 @@
}
buffer = init_buf(0);
msg.data = req;
+ msg.protocol_version = SLURM_PROTOCOL_VERSION;
pack_msg(&msg, buffer);
len = get_buf_offset(buffer);
safe_write(fd, &len, sizeof(int));
@@ -628,16 +663,8 @@
error("close read to_slurmd in parent: %m");
return rc;
} else {
- char slurm_stepd_path[MAXPATHLEN];
- char *const argv[2] = { slurm_stepd_path, NULL};
+ char *const argv[2] = { (char *)conf->stepd_loc, NULL};
int failed = 0;
- if (conf->stepd_loc) {
- snprintf(slurm_stepd_path, sizeof(slurm_stepd_path),
- "%s", conf->stepd_loc);
- } else {
- snprintf(slurm_stepd_path, sizeof(slurm_stepd_path),
- "%s/sbin/slurmstepd", SLURM_PREFIX);
- }
/* inform slurmstepd about our config */
setenv("SLURM_CONF", conf->conffile, 1);
@@ -701,7 +728,7 @@
int node_id, hostset_t *step_hset)
{
slurm_cred_arg_t arg;
- hostset_t j_hset = NULL, s_hset = NULL;
+ hostset_t s_hset = NULL;
bool user_ok = _slurm_authorized_user(uid);
bool verified = true;
int host_index = -1;
@@ -771,6 +798,10 @@
uint32_t hi, i, i_first_bit=0, i_last_bit=0, j;
bool cpu_log = slurm_get_debug_flags() & DEBUG_FLAG_CPU_BIND;
+#ifdef HAVE_FRONT_END
+ host_index = 0; /* It is always 0 for front end systems */
+#else
+ hostset_t j_hset;
/* Determine the CPU count based upon this node's index into
* the _job's_ allocation (job's hostlist and core_bitmap) */
if (!(j_hset = hostset_create(arg.job_hostlist))) {
@@ -778,15 +809,14 @@
arg.job_hostlist);
goto fail;
}
-
host_index = hostset_find(j_hset, conf->node_name);
+ hostset_destroy(j_hset);
if ((host_index < 0) || (host_index >= arg.job_nhosts)) {
error("job cr credential invalid host_index %d for "
"job %u", host_index, arg.jobid);
goto fail;
}
- hostset_destroy(j_hset);
- j_hset = NULL;
+#endif
if (cpu_log) {
char *per_job = "", *per_step = "";
@@ -904,8 +934,6 @@
return SLURM_SUCCESS;
fail:
- if (j_hset)
- hostset_destroy(j_hset);
if (s_hset)
hostset_destroy(s_hset);
*step_hset = NULL;
@@ -931,8 +959,11 @@
socklen_t adlen;
hostset_t step_hset = NULL;
job_mem_limits_t *job_limits_ptr;
- int nodeid = nodelist_find(req->complete_nodelist, conf->node_name);
-
+ int nodeid = 0;
+#ifndef HAVE_FRONT_END
+ /* It is always 0 for front end systems */
+ nodeid = nodelist_find(req->complete_nodelist, conf->node_name);
+#endif
req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
memcpy(&req->orig_addr, &msg->orig_addr, sizeof(slurm_addr_t));
@@ -1498,6 +1529,42 @@
/* Never return a message, slurmctld does not expect one */
}
+static void
+_rpc_reboot(slurm_msg_t *msg)
+{
+ char *reboot_program, *sp;
+ slurm_ctl_conf_t *cfg;
+ uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
+ int exit_code;
+
+ if (!_slurm_authorized_user(req_uid))
+ error("Security violation, reboot RPC from uid %d",
+ req_uid);
+ else {
+ cfg = slurm_conf_lock();
+ reboot_program = cfg->reboot_program;
+ if (reboot_program) {
+ sp = strchr(reboot_program, ' ');
+ if (sp)
+ sp = xstrndup(reboot_program,
+ (sp - reboot_program));
+ else
+ sp = xstrdup(reboot_program);
+ if (access(sp, R_OK | X_OK) < 0)
+ error("Cannot run RebootProgram [%s]: %m", sp);
+ else if ((exit_code = system(reboot_program)))
+ error("system(%s) returned %d", reboot_program,
+ exit_code);
+ xfree(sp);
+ } else
+ error("RebootProgram isn't defined in config");
+ slurm_conf_unlock();
+ }
+
+ /* Never return a message, slurmctld does not expect one */
+ /* slurm_send_rc_msg(msg, rc); */
+}
+
static void _job_limits_free(void *x)
{
xfree(x);
@@ -3306,6 +3373,8 @@
int delay;
char *resv_id = NULL;
slurm_ctl_conf_t *cf;
+ bool have_spank = false;
+ struct stat stat_buf;
debug("_rpc_terminate_job, uid = %d", uid);
/*
@@ -3397,6 +3466,12 @@
_kill_all_active_steps(req->job_id, SIGTERM, true);
}
+ cf = slurm_conf_lock();
+ delay = MAX(cf->kill_wait, 5);
+ if (cf->plugstack && (stat(cf->plugstack, &stat_buf) == 0))
+ have_spank = true;
+ slurm_conf_unlock();
+
/*
* If there are currently no active job steps and no
* configured epilog to run, bypass asynchronous reply and
@@ -3404,8 +3479,9 @@
* request. We need to send current switch state on AIX
* systems, so this bypass can not be used.
*/
+
#ifndef HAVE_AIX
- if ((nsteps == 0) && !conf->epilog) {
+ if ((nsteps == 0) && !conf->epilog && !have_spank) {
debug4("sent ALREADY_COMPLETE");
if (msg->conn_fd >= 0)
slurm_send_rc_msg(msg,
@@ -3446,11 +3522,8 @@
/*
* Check for corpses
*/
- cf = slurm_conf_lock();
- delay = MAX(cf->kill_wait, 5);
- slurm_conf_unlock();
- if ( !_pause_for_job_completion (req->job_id, req->nodes, delay)
- && (xcpu_signal(SIGKILL, req->nodes) +
+ if ( !_pause_for_job_completion (req->job_id, req->nodes, delay) &&
+ (xcpu_signal(SIGKILL, req->nodes) +
_terminate_all_steps(req->job_id, true)) ) {
/*
* Block until all user processes are complete.
@@ -3715,6 +3788,7 @@
if (spank_job_env_size)
env_array_merge(&env, (const char **) spank_job_env);
+ setenvf(&env, "SLURM_CONF", conf->conffile);
setenvf(&env, "SLURM_JOB_ID", "%u", jobid);
setenvf(&env, "SLURM_JOB_UID", "%u", uid);
name = uid_to_string(uid);
@@ -3722,6 +3796,11 @@
xfree(name);
setenvf(&env, "SLURM_JOBID", "%u", jobid);
setenvf(&env, "SLURM_UID", "%u", uid);
+
+ slurm_mutex_lock(&conf->config_mutex);
+ setenvf(&env, "SLURMD_NODENAME", "%s", conf->node_name);
+ slurm_mutex_unlock(&conf->config_mutex);
+
if (resv_id) {
#if defined(HAVE_BG)
setenvf(&env, "MPIRUN_PARTITION", "%s", resv_id);
@@ -3751,6 +3830,86 @@
return;
}
+static int
+run_spank_job_script (const char *mode, char **env)
+{
+ pid_t cpid;
+ int status = 0;
+ int pfds[2];
+
+ if (pipe (pfds) < 0) {
+ error ("run_spank_job_script: pipe: %m");
+ return (-1);
+ }
+
+ fd_set_close_on_exec (pfds[1]);
+
+ debug ("Calling %s spank %s", conf->stepd_loc, mode);
+ if ((cpid = fork ()) < 0) {
+ error ("executing spank %s: %m", mode);
+ return (-1);
+ }
+ if (cpid == 0) {
+ /* Run slurmstepd spank [prolog|epilog] */
+ char *argv[4] = {
+ (char *) conf->stepd_loc,
+ "spank",
+ (char *) mode,
+ NULL };
+
+ if (dup2 (pfds[0], STDIN_FILENO) < 0)
+ fatal ("dup2: %m");
+#ifdef SETPGRP_TWO_ARGS
+ setpgrp(0, 0);
+#else
+ setpgrp();
+#endif
+ execve (argv[0], argv, env);
+ error ("execve: %m");
+ exit (127);
+ }
+
+ close (pfds[0]);
+
+ if (_send_slurmd_conf_lite (pfds[1], conf) < 0)
+ error ("Failed to send slurmd conf to slurmstepd\n");
+ close (pfds[1]);
+
+ /*
+ * Wait for up to 120s for all spank plugins to complete:
+ */
+ if (waitpid_timeout (mode, cpid, &status, 120) < 0) {
+ error ("spank/%s timed out after 120s", mode);
+ return (-1);
+ }
+
+ if (status)
+ error ("spank/%s returned status 0x%04x", mode, status);
+
+ /*
+ * No longer need SPANK option env vars in environment
+ */
+ spank_clear_remote_options_env (env);
+
+ return (status);
+}
+
+static int _run_job_script(const char *name, const char *path,
+ uint32_t jobid, int timeout, char **env)
+{
+ int status, rc;
+ /*
+ * Always run both spank prolog/epilog and real prolog/epilog script,
+ * even if spank plugins fail. (May want to alter this in the future)
+ * If both "script" mechanisms fail, prefer to return the "real"
+ * prolog/epilog status.
+ */
+ status = run_spank_job_script(name, env);
+ if ((rc = run_script(name, path, jobid, timeout, env)))
+ status = rc;
+ return (status);
+}
+
#ifdef HAVE_BG
/* a slow prolog is expected on bluegene systems */
static int
@@ -3767,7 +3926,7 @@
slurm_mutex_unlock(&conf->config_mutex);
_add_job_running_prolog(jobid);
- rc = run_script("prolog", my_prolog, jobid, -1, my_env);
+ rc = _run_job_script("prolog", my_prolog, jobid, -1, my_env);
_remove_job_running_prolog(jobid);
xfree(my_prolog);
_destroy_env(my_env);
@@ -3842,7 +4001,7 @@
timer_struct.timer_cond = &timer_cond;
timer_struct.timer_mutex = &timer_mutex;
pthread_create(&timer_id, &timer_attr, &_prolog_timer, &timer_struct);
- rc = run_script("prolog", my_prolog, jobid, -1, my_env);
+ rc = _run_job_script("prolog", my_prolog, jobid, -1, my_env);
slurm_mutex_lock(&timer_mutex);
prolog_fini = true;
pthread_cond_broadcast(&timer_cond);
@@ -3881,7 +4040,7 @@
slurm_mutex_unlock(&conf->config_mutex);
_wait_for_job_running_prolog(jobid);
- error_code = run_script("epilog", my_epilog, jobid, -1, my_env);
+ error_code = _run_job_script("epilog", my_epilog, jobid, -1, my_env);
xfree(my_epilog);
_destroy_env(my_env);
@@ -4355,3 +4514,59 @@
slurm_mutex_unlock(&conf->prolog_running_lock);
debug( "Finished wait for job %d's prolog to complete", job_id);
}
+
+
+static void
+_rpc_forward_data(slurm_msg_t *msg)
+{
+ forward_data_msg_t *req = (forward_data_msg_t *)msg->data;
+ uint32_t req_uid;
+ struct sockaddr_un sa;
+ int fd = -1, rc;
+
+ debug3("Entering _rpc_forward_data, address: %s, len: %u",
+ req->address, req->len);
+
+ /* sanity check */
+ if (strlen(req->address) > sizeof(sa.sun_path) - 1) {
+ rc = EINVAL;
+ goto done;
+ }
+
+ /* connect to specified address */
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ error("failed creating UNIX domain socket: %m");
+ goto done;
+ }
+ memset(&sa, 0, sizeof(sa));
+ sa.sun_family = AF_UNIX;
+ strcpy(sa.sun_path, req->address);
+ while ((rc = connect(fd, (struct sockaddr *)&sa, SUN_LEN(&sa)) < 0) &&
+ (errno == EINTR));
+ if (rc < 0) {
+ debug2("failed connecting to specified socket '%s': %m",
+ req->address);
+ goto done;
+ }
+
+ req_uid = (uint32_t)g_slurm_auth_get_uid(msg->auth_cred, NULL);
+ /*
+ * although always in localhost, we still convert it to network
+ * byte order, to make it consistent with pack/unpack.
+ */
+ req_uid = htonl(req_uid);
+ safe_write(fd, &req_uid, sizeof(uint32_t));
+ req_uid = htonl(req->len);
+ safe_write(fd, &req_uid, sizeof(uint32_t));
+ safe_write(fd, req->data, req->len);
+
+rwfail:
+done:
+ if (fd >= 0)
+ close(fd);
+ rc = errno;
+ slurm_send_rc_msg(msg, rc);
+}
+
+
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index dfd92a9..bdb9ae4 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -58,6 +58,9 @@
#include <stdlib.h>
#include <sys/mman.h>
#include <dlfcn.h>
+#if HAVE_HWLOC
+#include <hwloc.h>
+#endif
#include "src/common/bitstring.h"
#include "src/common/daemonize.h"
@@ -89,6 +92,7 @@
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/common/xsignal.h"
+#include "src/common/plugstack.h"
#include "src/slurmd/slurmd/slurmd.h"
#include "src/slurmd/slurmd/req.h"
@@ -195,7 +199,7 @@
"setgroups: %m");
}
} else {
- info("Not running as root. Can't drop supplementary groups");
+ debug("Not running as root. Can't drop supplementary groups");
}
/*
@@ -601,12 +605,12 @@
if (first_msg) {
first_msg = false;
- info("Procs=%u Sockets=%u Cores=%u Threads=%u "
+ info("CPUs=%u Sockets=%u Cores=%u Threads=%u "
"Memory=%u TmpDisk=%u Uptime=%u",
msg->cpus, msg->sockets, msg->cores, msg->threads,
msg->real_memory, msg->tmp_disk, msg->up_time);
} else {
- debug3("Procs=%u Sockets=%u Cores=%u Threads=%u "
+ debug3("CPUs=%u Sockets=%u Cores=%u Threads=%u "
"Memory=%u TmpDisk=%u Uptime=%u",
msg->cpus, msg->sockets, msg->cores, msg->threads,
msg->real_memory, msg->tmp_disk, msg->up_time);
@@ -691,6 +695,135 @@
}
}
+#if HAVE_HWLOC
+/*
+ * get_hwlocinfo - Return detailed cpuinfo on this system using hwloc library
+ * compatible with get_cpuinfo() in common/xcpuinfo.c
+ * Input: numproc - number of processors on the system
+ * Output: p_sockets - number of physical processor sockets
+ * p_cores - total number of physical CPU cores
+ * p_threads - total number of hardware execution threads
+ * p_block_map - asbtract->physical block distribution map
+ * p_block_map_inv - physical->abstract block distribution map (inverse)
+ * return code - 0 if no error, otherwise errno
+ * NOTE: User must xfree *p_block_map and *p_block_map_inv
+ */
+static int
+_get_hwlocinfo(uint16_t *p_numproc,
+ uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
+ uint16_t *p_block_map_size,
+ uint16_t **p_block_map, uint16_t **p_block_map_inv)
+{
+ enum { SOCKET=0, CORE=1, PU=2 };
+ hwloc_topology_t topology;
+ hwloc_obj_t obj;
+ hwloc_obj_type_t objtype[3];
+ unsigned idx[3];
+ int nobj[3];
+ int actual_cpus;
+ int macid;
+ int absid;
+ int i;
+
+ debug("hwloc_topology_init");
+ if (hwloc_topology_init(&topology)) {
+ /* error in initialize hwloc library */
+ debug("hwloc_topology_init() failed.");
+ return 1;
+ }
+
+ /* parse all system */
+ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);
+
+ /* ignores cache, group, misc */
+ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
+ hwloc_topology_ignore_type (topology, HWLOC_OBJ_GROUP);
+ hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);
+
+ /* load topology */
+ debug("hwloc_topology_load");
+ if (hwloc_topology_load(topology)) {
+ /* error in load hardware topology */
+ debug("hwloc_topology_load() failed.");
+ hwloc_topology_destroy(topology);
+ return 2;
+ }
+
+ if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
+ hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) {
+ /* One socket contains multiple NUMA-nodes
+ * like AMD Opteron 6000 series etc.
+ * In such case, use NUMA-node instead of socket. */
+ objtype[SOCKET] = HWLOC_OBJ_NODE;
+ objtype[CORE] = HWLOC_OBJ_CORE;
+ objtype[PU] = HWLOC_OBJ_PU;
+ } else {
+ objtype[SOCKET] = HWLOC_OBJ_SOCKET;
+ objtype[CORE] = HWLOC_OBJ_CORE;
+ objtype[PU] = HWLOC_OBJ_PU;
+ }
+
+ /* number of objects */
+ nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
+ nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
+ actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
+ nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */
+ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */
+
+ debug4("Total %d CPUs, %d sockets, %d core/socket, %d pu/core",
+ actual_cpus, nobj[SOCKET], nobj[CORE], nobj[PU]);
+
+ /* allocate block_map */
+ *p_block_map_size = (uint16_t)actual_cpus;
+ if (p_block_map && p_block_map_inv) {
+ *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t));
+ *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));
+
+ /* initialize default as linear mapping */
+ for (i = 0; i < actual_cpus; i++) {
+ (*p_block_map)[i] = i;
+ (*p_block_map_inv)[i] = i;
+ }
+
+ /* create map with hwloc */
+ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
+ for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
+ for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
+ /* get hwloc_obj by indexes */
+ obj=hwloc_get_obj_below_array_by_type(
+ topology, 3, objtype, idx);
+ if (!obj)
+ continue;
+ macid = obj->os_index;
+ absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
+ + idx[CORE]*nobj[PU]
+ + idx[PU];
+
+ if ((macid >= actual_cpus) ||
+ (absid >= actual_cpus)) {
+ /* physical or logical ID are
+ * out of range */
+ continue;
+ }
+ debug4("CPU map[%d]=>%d", absid, macid);
+ (*p_block_map)[absid] = macid;
+ (*p_block_map_inv)[macid] = absid;
+ }
+ }
+ }
+ }
+
+ hwloc_topology_destroy(topology);
+
+ /* update output parameters */
+ *p_numproc = actual_cpus;
+ *p_sockets = nobj[SOCKET];
+ *p_cores = nobj[CORE];
+ *p_threads = nobj[PU];
+ return 0;
+}
+#endif /* HAVE_HWLOC */
+
/*
* Read the slurm configuration file (slurm.conf) and substitute some
* values into the slurmd configuration in preference of the defaults.
@@ -766,6 +899,14 @@
_update_logging();
_update_nice();
+#if HAVE_HWLOC
+ _get_hwlocinfo(&conf->actual_cpus,
+ &conf->actual_sockets,
+ &conf->actual_cores,
+ &conf->actual_threads,
+ &conf->block_map_size,
+ &conf->block_map, &conf->block_map_inv);
+#else
get_procs(&conf->actual_cpus);
get_cpuinfo(conf->actual_cpus,
&conf->actual_sockets,
@@ -773,6 +914,7 @@
&conf->actual_threads,
&conf->block_map_size,
&conf->block_map, &conf->block_map_inv);
+#endif
#ifdef HAVE_FRONT_END
/*
* When running with multiple frontends, the slurmd S:C:T values are not
@@ -811,7 +953,7 @@
(conf->threads != conf->actual_threads)) {
if (cf->fast_schedule) {
info("Node configuration differs from hardware\n"
- " Procs=%u:%u(hw) Sockets=%u:%u(hw)\n"
+ " CPUs=%u:%u(hw) Sockets=%u:%u(hw)\n"
" CoresPerSocket=%u:%u(hw) "
"ThreadsPerCore=%u:%u(hw)",
conf->cpus, conf->actual_cpus,
@@ -825,7 +967,7 @@
"will be what is in the slurm.conf because of "
"the bitmaps the slurmctld must create before "
"the slurmd registers.\n"
- " Procs=%u:%u(hw) Sockets=%u:%u(hw)\n"
+ " CPUs=%u:%u(hw) Sockets=%u:%u(hw)\n"
" CoresPerSocket=%u:%u(hw) "
"ThreadsPerCore=%u:%u(hw)",
conf->cpus, conf->actual_cpus,
@@ -1090,11 +1232,19 @@
static void
_print_config(void)
{
+ int days, hours, mins, secs;
char name[128];
gethostname_short(name, sizeof(name));
printf("NodeName=%s ", name);
-
+#if HAVE_HWLOC
+ _get_hwlocinfo(&conf->actual_cpus,
+ &conf->actual_sockets,
+ &conf->actual_cores,
+ &conf->actual_threads,
+ &conf->block_map_size,
+ &conf->block_map, &conf->block_map_inv);
+#else
get_procs(&conf->actual_cpus);
get_cpuinfo(conf->actual_cpus,
&conf->actual_sockets,
@@ -1102,7 +1252,8 @@
&conf->actual_threads,
&conf->block_map_size,
&conf->block_map, &conf->block_map_inv);
- printf("Procs=%u Sockets=%u CoresPerSocket=%u ThreadsPerCore=%u ",
+#endif
+ printf("CPUs=%u Sockets=%u CoresPerSocket=%u ThreadsPerCore=%u ",
conf->actual_cpus, conf->actual_sockets, conf->actual_cores,
conf->actual_threads);
@@ -1110,6 +1261,13 @@
get_tmp_disk(&conf->tmp_disk_space, "/tmp");
printf("RealMemory=%u TmpDisk=%u\n",
conf->real_memory_size, conf->tmp_disk_space);
+
+ get_up_time(&conf->up_time);
+ secs = conf->up_time % 60;
+ mins = (conf->up_time / 60) % 60;
+ hours = (conf->up_time / 3600) % 24;
+ days = (conf->up_time / 86400);
+ printf("UpTime=%u-%2.2u:%2.2u:%2.2u\n", days, hours, mins, secs);
}
static void
@@ -1172,6 +1330,14 @@
break;
}
}
+
+ /*
+ * If slurmstepd path wasn't overridden by command line, set
+ * it to the default here:
+ */
+ if (!conf->stepd_loc)
+ conf->stepd_loc =
+ xstrdup_printf("%s/sbin/slurmstepd", SLURM_PREFIX);
}
@@ -1210,7 +1376,6 @@
struct rlimit rlim;
slurm_ctl_conf_t *cf;
struct stat stat_buf;
- char slurm_stepd_path[MAXPATHLEN];
uint32_t cpu_cnt;
/*
@@ -1257,12 +1422,16 @@
_set_topo_info();
_print_conf();
+ if (slurm_jobacct_gather_init() != SLURM_SUCCESS)
+ return SLURM_FAILURE;
if (slurm_proctrack_init() != SLURM_SUCCESS)
return SLURM_FAILURE;
if (slurmd_task_init() != SLURM_SUCCESS)
return SLURM_FAILURE;
if (slurm_auth_init(NULL) != SLURM_SUCCESS)
return SLURM_FAILURE;
+ if (spank_slurmd_init() < 0)
+ return SLURM_FAILURE;
if (getrlimit(RLIMIT_CPU, &rlim) == 0) {
rlim.rlim_cur = rlim.rlim_max;
@@ -1308,6 +1477,8 @@
}
if (conf->daemonize) {
+ bool success = false;
+
if (conf->logfile && (conf->logfile[0] == '/')) {
char *slash_ptr, *work_dir;
work_dir = xstrdup(conf->logfile);
@@ -1316,17 +1487,29 @@
work_dir[1] = '\0';
else
slash_ptr[0] = '\0';
- if (chdir(work_dir) < 0) {
+ if ((access(work_dir, W_OK) != 0) ||
+ (chdir(work_dir) < 0)) {
error("Unable to chdir to %s", work_dir);
- xfree(work_dir);
- return SLURM_FAILURE;
- }
+ } else
+ success = true;
xfree(work_dir);
- } else {
- if (chdir(conf->spooldir) < 0) {
+ }
+
+ if (!success) {
+ if ((access(conf->spooldir, W_OK) != 0) ||
+ (chdir(conf->spooldir) < 0)) {
error("Unable to chdir to %s", conf->spooldir);
+ } else
+ success = true;
+ }
+
+ if (!success) {
+ if ((access("/var/tmp", W_OK) != 0) ||
+ (chdir("/var/tmp") < 0)) {
+ error("chdir(/var/tmp): %m");
return SLURM_FAILURE;
- }
+ } else
+ info("chdir to /var/tmp");
}
}
@@ -1347,21 +1530,10 @@
fd_set_close_on_exec(devnull);
/* make sure we have slurmstepd installed */
- if (conf->stepd_loc) {
- snprintf(slurm_stepd_path, sizeof(slurm_stepd_path),
- "%s", conf->stepd_loc);
- } else {
- snprintf(slurm_stepd_path, sizeof(slurm_stepd_path),
- "%s/sbin/slurmstepd", SLURM_PREFIX);
- }
- if (stat(slurm_stepd_path, &stat_buf)) {
- fatal("Unable to find slurmstepd file at %s",
- slurm_stepd_path);
- }
- if (!S_ISREG(stat_buf.st_mode)) {
- fatal("slurmstepd not a file at %s",
- slurm_stepd_path);
- }
+ if (stat(conf->stepd_loc, &stat_buf))
+ fatal("Unable to find slurmstepd file at %s", conf->stepd_loc);
+ if (!S_ISREG(stat_buf.st_mode))
+ fatal("slurmstepd not a file at %s", conf->stepd_loc);
return SLURM_SUCCESS;
}
@@ -1432,6 +1604,7 @@
fini_setproctitle();
slurm_select_fini();
slurm_jobacct_gather_fini();
+ spank_slurmd_exit();
return SLURM_SUCCESS;
}
diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h
index 1e07aa4..d5eb541 100644
--- a/src/slurmd/slurmd/slurmd.h
+++ b/src/slurmd/slurmd/slurmd.h
@@ -111,7 +111,7 @@
char *pubkey; /* location of job cred public key */
char *epilog; /* Path to Epilog script */
char *prolog; /* Path to prolog script */
- char *stepd_loc; /* Non-standard slurmstepd path */
+ char *stepd_loc; /* slurmstepd path */
char *task_prolog; /* per-task prolog script */
char *task_epilog; /* per-task epilog script */
int port; /* local slurmd port */
diff --git a/src/slurmd/slurmstepd/Makefile.am b/src/slurmd/slurmstepd/Makefile.am
index 31d7b1a..bfb98d5 100644
--- a/src/slurmd/slurmstepd/Makefile.am
+++ b/src/slurmd/slurmstepd/Makefile.am
@@ -36,5 +36,5 @@
endif
force:
-$(slurmd_LDADD) : force
+$(slurmstepd_LDADD) : force
@cd `dirname $@` && $(MAKE) `basename $@`
diff --git a/src/slurmd/slurmstepd/Makefile.in b/src/slurmd/slurmstepd/Makefile.in
index 36f2992..eb0ef10 100644
--- a/src/slurmd/slurmstepd/Makefile.in
+++ b/src/slurmd/slurmstepd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -197,6 +197,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -233,6 +234,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -424,7 +426,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-slurmstepd$(EXEEXT): $(slurmstepd_OBJECTS) $(slurmstepd_DEPENDENCIES)
+slurmstepd$(EXEEXT): $(slurmstepd_OBJECTS) $(slurmstepd_DEPENDENCIES) $(EXTRA_slurmstepd_DEPENDENCIES)
@rm -f slurmstepd$(EXEEXT)
$(slurmstepd_LINK) $(slurmstepd_OBJECTS) $(slurmstepd_LDADD) $(LIBS)
@@ -573,10 +575,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -677,7 +684,7 @@
force:
-$(slurmd_LDADD) : force
+$(slurmstepd_LDADD) : force
@cd `dirname $@` && $(MAKE) `basename $@`
# Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c
index 8942700..622fb40 100644
--- a/src/slurmd/slurmstepd/io.c
+++ b/src/slurmd/slurmstepd/io.c
@@ -276,7 +276,7 @@
}
/* If this is a newly attached client its msg_queue needs
- * to be intialized from the outgoing_cache, and then "obj" needs
+ * to be initialized from the outgoing_cache, and then "obj" needs
* to be added to the List of clients.
*/
if (client->msg_queue == NULL) {
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 54f442b..0799875 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -172,7 +172,7 @@
static void _send_launch_failure(launch_tasks_request_msg_t *,
slurm_addr_t *, int);
static int _drain_node(char *reason);
-static int _fork_all_tasks(slurmd_job_t *job);
+static int _fork_all_tasks(slurmd_job_t *job, bool *io_initialized);
static int _become_user(slurmd_job_t *job, struct priv_state *ps);
static void _set_prio_process (slurmd_job_t *job);
static void _set_job_log_prefix(slurmd_job_t *job);
@@ -591,6 +591,7 @@
static void
_random_sleep(slurmd_job_t *job)
{
+#if !defined HAVE_FRONT_END
long int delay = 0;
long int max = (3 * job->nnodes);
@@ -598,8 +599,8 @@
delay = lrand48() % ( max + 1 );
debug3("delaying %ldms", delay);
- if (poll(NULL, 0, delay) == -1)
- return;
+ poll(NULL, 0, delay);
+#endif
}
/*
@@ -626,8 +627,8 @@
resp.msg_type = MESSAGE_TASK_EXIT;
/*
- * XXX Hack for TCP timeouts on exit of large, synchronized
- * jobs. Delay a random amount if job->nnodes > 100
+ * Hack for TCP timeouts on exit of large, synchronized job
+ * termination. Delay a random amount if job->nnodes > 100
*/
if (job->nnodes > 100)
_random_sleep(job);
@@ -886,12 +887,18 @@
int
job_manager(slurmd_job_t *job)
{
- int rc = 0;
+ int rc = SLURM_SUCCESS;
bool io_initialized = false;
char *ckpt_type = slurm_get_checkpoint_type();
debug3("Entered job_manager for %u.%u pid=%d",
job->jobid, job->stepid, job->jmgr_pid);
+
+#ifdef PR_SET_DUMPABLE
+ if (prctl(PR_SET_DUMPABLE, 1) < 0)
+ debug ("Unable to set dumpable to 1");
+#endif /* PR_SET_DUMPABLE */
+
/*
* Preload plugins.
*/
@@ -914,69 +921,59 @@
goto fail1;
}
-#ifdef HAVE_CRAY
- /*
- * We need to call the proctrack/sgi_job container-create function here
- * already since the select/cray plugin needs the job container ID in
- * order to CONFIRM the ALPS reservation.
- * It is not a good idea to perform this setup in _fork_all_tasks(),
- * since any transient failure of ALPS (which can happen in practice)
- * will then set the frontend node to DRAIN.
- */
if ((job->cont_id == 0) &&
(slurm_container_create(job) != SLURM_SUCCESS)) {
- error("failed to create proctrack/sgi_job container: %m");
+ error("slurm_container_create: %m");
rc = ESLURMD_SETUP_ENVIRONMENT_ERROR;
goto fail1;
}
- rc = _select_cray_plugin_job_ready(job);
- if (rc != SLURM_SUCCESS) {
- /*
- * Transient error: slurmctld knows this condition to mean that
- * the ALPS (not the SLURM) reservation failed and tries again.
- */
- if (rc == READY_JOB_ERROR)
- rc = ESLURM_RESERVATION_NOT_USABLE;
- else
- rc = ESLURMD_SETUP_ENVIRONMENT_ERROR;
- error("could not confirm ALPS reservation #%u", job->resv_id);
- goto fail1;
+#ifdef HAVE_CRAY
+ /*
+ * Note that the previously called slurm_container_create function is
+ * mandatory since the select/cray plugin needs the job container
+ * ID in order to CONFIRM the ALPS reservation.
+ * It is not a good idea to perform this setup in _fork_all_tasks(),
+ * since any transient failure of ALPS (which can happen in practice)
+ * will then set the frontend node to DRAIN.
+ *
+ * ALso note that we do not check the reservation for batch jobs with
+ * a reservation ID of zero and no CPUs. These are SLURM job
+ * allocations containing no compute nodes and thus have no ALPS
+ * reservation.
+ */
+ if (!job->batch || job->resv_id || job->cpus) {
+ rc = _select_cray_plugin_job_ready(job);
+ if (rc != SLURM_SUCCESS) {
+ /*
+ * Transient error: slurmctld knows this condition to
+ * mean that the ALPS (not the SLURM) reservation
+ * failed and tries again.
+ */
+ if (rc == READY_JOB_ERROR)
+ rc = ESLURM_RESERVATION_NOT_USABLE;
+ else
+ rc = ESLURMD_SETUP_ENVIRONMENT_ERROR;
+ error("could not confirm ALPS reservation #%u",
+ job->resv_id);
+ goto fail1;
+ }
}
#endif
-#ifdef PR_SET_DUMPABLE
- if (prctl(PR_SET_DUMPABLE, 1) < 0)
- debug ("Unable to set dumpable to 1");
-#endif /* PR_SET_DUMPABLE */
-
- set_umask(job); /* set umask for stdout/err files */
- if (job->user_managed_io)
- rc = _setup_user_managed_io(job);
- else
- rc = _setup_normal_io(job);
- /*
- * Initialize log facility to copy errors back to srun
- */
- if(!rc)
- rc = _slurmd_job_log_init(job);
-
- if (rc) {
- error("IO setup failed: %m");
- job->task[0]->estatus = 0x0100;
- step_complete.step_rc = 0x0100;
- rc = SLURM_SUCCESS; /* drains node otherwise */
- goto fail2;
- } else {
- io_initialized = true;
+ debug2("Before call to spank_init()");
+ if (spank_init (job) < 0) {
+ error ("Plugin stack initialization failed.");
+ rc = SLURM_PLUGIN_NAME_INVALID;
+ goto fail1;
}
+ debug2("After call to spank_init()");
/* Call interconnect_init() before becoming user */
if (!job->batch &&
(interconnect_init(job->switch_job, job->uid) < 0)) {
/* error("interconnect_init: %m"); already logged */
rc = ESLURM_INTERCONNECT_FAILURE;
- io_close_task_fds(job);
goto fail2;
}
@@ -988,13 +985,26 @@
goto fail2;
}
- /* calls pam_setup() and requires pam_finish() if successful */
- if (_fork_all_tasks(job) < 0) {
- debug("_fork_all_tasks failed");
- rc = ESLURMD_EXECVE_FAILED;
- io_close_task_fds(job);
+ /* fork necessary threads for MPI */
+ if (mpi_hook_slurmstepd_prefork(job, &job->env) != SLURM_SUCCESS) {
+ error("Failed mpi_hook_slurmstepd_prefork");
+ rc = SLURM_FAILURE;
goto fail2;
}
+
+ /* calls pam_setup() and requires pam_finish() if successful */
+ if ((rc = _fork_all_tasks(job, &io_initialized)) < 0) {
+ debug("_fork_all_tasks failed");
+ rc = ESLURMD_EXECVE_FAILED;
+ goto fail2;
+ }
+
+ /*
+ * If IO initialization failed, return SLURM_SUCCESS
+ * or the node will be drain otherwise
+ */
+ if ((rc == SLURM_SUCCESS) && !io_initialized)
+ goto fail2;
io_close_task_fds(job);
@@ -1017,12 +1027,6 @@
job->state = SLURMSTEPD_STEP_ENDING;
- /*
- * This just cleans up all of the PAM state and errors are logged
- * below, so there's no need for error handling.
- */
- pam_finish();
-
if (!job->batch &&
(interconnect_fini(job->switch_job) < 0)) {
error("interconnect_fini: %m");
@@ -1063,6 +1067,15 @@
*/
post_step(job);
+ /*
+ * This just cleans up all of the PAM state in case rc == 0
+ * which means _fork_all_tasks performs well.
+ * Must be done after IO termination in case of IO operations
+ * require something provided by the PAM (i.e. security token)
+ */
+ if (!rc)
+ pam_finish();
+
debug2("Before call to spank_fini()");
if (spank_fini (job) < 0) {
error ("spank_fini failed");
@@ -1135,8 +1148,10 @@
if (e == NULL)
return;
- close (e->parentfd);
- close (e->childfd);
+ if (e->parentfd >= 0)
+ close (e->parentfd);
+ if (e->childfd >= 0)
+ close (e->childfd);
e->id = -1;
e->pid = -1;
}
@@ -1159,9 +1174,17 @@
exec_wait_info_destroy (e);
return (NULL);
}
- else if (e->pid == 0) /* In child, close parent fd */
+ /*
+ * Close parentfd in child, and childfd in parent:
+ */
+ if (e->pid == 0) {
close (e->parentfd);
-
+ e->parentfd = -1;
+ }
+ else {
+ close (e->childfd);
+ e->childfd = -1;
+ }
return (e);
}
@@ -1193,6 +1216,47 @@
return (0);
}
+/*
+ * Send SIGKILL to child in exec_wait_info 'e'
+ * Returns 0 for success, -1 for failure.
+ */
+static int exec_wait_kill_child (struct exec_wait_info *e)
+{
+ if (e->pid < 0)
+ return (-1);
+ if (kill (e->pid, SIGKILL) < 0)
+ return (-1);
+ e->pid = -1;
+ return (0);
+}
+
+/*
+ * Send all children in exec_wait_list SIGKILL.
+ * Returns 0 for success or < 0 on failure.
+ */
+static int exec_wait_kill_children (List exec_wait_list)
+{
+ int rc = 0;
+ int count;
+ struct exec_wait_info *e;
+ ListIterator i;
+
+ if ((count = list_count (exec_wait_list)) == 0)
+ return (0);
+
+ verbose ("Killing %d remaining child%s",
+ count, (count > 1 ? "ren" : ""));
+
+ i = list_iterator_create (exec_wait_list);
+ if (i == NULL)
+ return error ("exec_wait_kill_children: iterator_create: %m");
+
+ while ((e = list_next (i)))
+ rc += exec_wait_kill_child (e);
+ list_iterator_destroy (i);
+ return (rc);
+}
+
static void prepare_stdio (slurmd_job_t *job, slurmd_task_info_t *task)
{
#ifdef HAVE_PTY_H
@@ -1225,7 +1289,7 @@
/* fork and exec N tasks
*/
static int
-_fork_all_tasks(slurmd_job_t *job)
+_fork_all_tasks(slurmd_job_t *job, bool *io_initialized)
{
int rc = SLURM_SUCCESS;
int i;
@@ -1236,19 +1300,6 @@
xassert(job != NULL);
- if ((job->cont_id == 0) &&
- (slurm_container_create(job) != SLURM_SUCCESS)) {
- error("slurm_container_create: %m");
- return SLURM_ERROR;
- }
-
- debug2("Before call to spank_init()");
- if (spank_init (job) < 0) {
- error ("Plugin stack initialization failed.");
- return SLURM_ERROR;
- }
- debug2("After call to spank_init()");
-
set_oom_adj(0); /* the tasks may be killed by OOM */
if (pre_setuid(job)) {
error("Failed task affinity setup");
@@ -1264,11 +1315,43 @@
if (pam_setup(job->pwd->pw_name, conf->hostname)
!= SLURM_SUCCESS){
error ("error in pam_setup");
- goto fail1;
+ rc = SLURM_ERROR;
}
- if (seteuid (job->pwd->pw_uid) < 0) {
- error ("seteuid: %m");
+ /*
+ * Reclaim privileges to do the io setup
+ */
+ _reclaim_privileges (&sprivs);
+ if (rc)
+ goto fail1; /* pam_setup error */
+
+ set_umask(job); /* set umask for stdout/err files */
+ if (job->user_managed_io)
+ rc = _setup_user_managed_io(job);
+ else
+ rc = _setup_normal_io(job);
+ /*
+ * Initialize log facility to copy errors back to srun
+ */
+ if (!rc)
+ rc = _slurmd_job_log_init(job);
+
+ if (rc) {
+ error("IO setup failed: %m");
+ job->task[0]->estatus = 0x0100;
+ step_complete.step_rc = 0x0100;
+ rc = SLURM_SUCCESS; /* drains node otherwise */
+ goto fail1;
+ } else {
+ *io_initialized = true;
+ }
+
+ /*
+ * Temporarily drop effective privileges
+ */
+ if (_drop_privileges (job, true, &sprivs) < 0) {
+ error ("_drop_privileges: %m");
+ rc = SLURM_ERROR;
goto fail2;
}
@@ -1277,18 +1360,23 @@
job->cwd);
if (chdir("/tmp") < 0) {
error("couldn't chdir to /tmp either. dying.");
- goto fail2;
+ rc = SLURM_ERROR;
+ goto fail3;
}
}
if (spank_user (job) < 0) {
error("spank_user failed.");
- return SLURM_ERROR;
+ rc = SLURM_ERROR;
+ goto fail4;
}
exec_wait_list = list_create ((ListDelF) exec_wait_info_destroy);
- if (!exec_wait_list)
- return error ("Unable to create exec_wait_list");
+ if (!exec_wait_list) {
+ error ("Unable to create exec_wait_list");
+ rc = SLURM_ERROR;
+ goto fail4;
+ }
/*
* Fork all of the task processes.
@@ -1300,7 +1388,9 @@
if ((ei = fork_child_with_wait_info (i)) == NULL) {
error("child fork: %m");
- goto fail2;
+ exec_wait_kill_children (exec_wait_list);
+ rc = SLURM_ERROR;
+ goto fail4;
} else if ((pid = exec_wait_get_pid (ei)) == 0) { /* child */
/*
* Destroy exec_wait_list in the child.
@@ -1350,7 +1440,8 @@
* children in any process groups or containers
* before they make a call to exec(2).
*/
- exec_wait_child_wait_for_parent (ei);
+ if (exec_wait_child_wait_for_parent (ei) < 0)
+ exit (1);
exec_task(job, i);
}
@@ -1413,16 +1504,19 @@
if (slurm_container_add(job, job->task[i]->pid)
== SLURM_ERROR) {
error("slurm_container_add: %m");
- goto fail1;
+ rc = SLURM_ERROR;
+ goto fail2;
}
jobacct_id.nodeid = job->nodeid;
jobacct_id.taskid = job->task[i]->gtid;
+ jobacct_id.job = job;
jobacct_gather_g_add_task(job->task[i]->pid,
&jobacct_id);
if (spank_task_post_fork (job, i) < 0) {
error ("spank task %d post-fork failed", i);
- goto fail1;
+ rc = SLURM_ERROR;
+ goto fail2;
}
}
jobacct_gather_g_set_proctrack_container_id(job->cont_id);
@@ -1445,16 +1539,21 @@
return rc;
-fail2:
+fail4:
+ if (chdir (sprivs.saved_cwd) < 0) {
+ error ("Unable to return to working directory");
+ }
+fail3:
_reclaim_privileges (&sprivs);
if (exec_wait_list)
list_destroy (exec_wait_list);
+fail2:
+ io_close_task_fds(job);
fail1:
pam_finish();
- return SLURM_ERROR;
+ return rc;
}
-
/*
* Loop once through tasks looking for all tasks that have exited with
* the same exit status (and whose statuses have not been sent back to
@@ -1567,7 +1666,7 @@
/************* acct stuff ********************/
jobacct = jobacct_gather_g_remove_task(pid);
- if(jobacct) {
+ if (jobacct) {
jobacct_gather_g_setinfo(jobacct,
JOBACCT_DATA_RUSAGE, &rusage);
jobacct_gather_g_aggregate(job->jobacct, jobacct);
@@ -1606,10 +1705,10 @@
}
job->envtp->procid = t->id;
- if (spank_task_exit (job, t->id) < 0)
+ if (spank_task_exit (job, t->id) < 0) {
error ("Unable to spank task %d at exit",
t->id);
-
+ }
post_term(job);
}
@@ -1658,8 +1757,11 @@
static void *_kill_thr(void *args)
{
kill_thread_t *kt = ( kill_thread_t *) args;
- sleep(kt->secs);
- pthread_kill(kt->thread_id, SIGKILL);
+ unsigned int pause = kt->secs;
+ do {
+ pause = sleep(pause);
+ } while (pause > 0);
+ pthread_cancel(kt->thread_id);
xfree(kt);
return NULL;
}
diff --git a/src/slurmd/slurmstepd/multi_prog.c b/src/slurmd/slurmstepd/multi_prog.c
index 61e538d..54be8ec 100644
--- a/src/slurmd/slurmstepd/multi_prog.c
+++ b/src/slurmd/slurmstepd/multi_prog.c
@@ -56,6 +56,8 @@
#include "src/common/xstring.h"
#include "multi_prog.h"
+#define MAX_ARGC 128
+
/*
* Test if the specified rank is included in the supplied task range
* IN rank - this task's rank
@@ -139,12 +141,12 @@
* to retrieve the argv arrays for each task on this node, rather
* than calling multi_prog_get_argv once for each task.
*/
-extern int
-multi_prog_get_argv(char *file_contents, char **prog_env, int task_rank,
- uint32_t *argc, char ***argv)
+extern int multi_prog_get_argv(char *config_data, char **prog_env,
+ int task_rank, uint32_t *argc, char ***argv,
+ int global_argc, char **global_argv)
{
char *line = NULL;
- int line_num = 0;
+ int i, line_num = 0;
int task_offset;
char *p = NULL, *s = NULL, *ptrptr = NULL;
char *rank_spec = NULL, *args_spec = NULL;
@@ -152,7 +154,7 @@
char **prog_argv = NULL;
char *local_data = NULL;
- prog_argv = (char **)xmalloc(sizeof(char *) * 128);
+ prog_argv = (char **)xmalloc(sizeof(char *) * MAX_ARGC);
if (task_rank < 0) {
error("Invalid task rank %d", task_rank);
@@ -161,7 +163,7 @@
return -1;
}
- local_data = xstrdup(file_contents);
+ local_data = xstrdup(config_data);
line = strtok_r(local_data, "\n", &ptrptr);
while (line) {
@@ -209,6 +211,10 @@
while (*args_spec != '\0') {
/* Only simple quote and escape supported */
prog_argv[prog_argc ++] = args_spec;
+ if ((prog_argc + 1) >= MAX_ARGC) {
+ info("Exceeded multi-prog argc limit");
+ break;
+ }
CONT: while (*args_spec != '\0' && *args_spec != '\\'
&& *args_spec != '%'
&& *args_spec != '\'' && !isspace (*args_spec)) {
@@ -265,6 +271,13 @@
}
+ for (i = 2; i < global_argc; i++) {
+ if ((prog_argc + 1) >= MAX_ARGC) {
+ info("Exceeded multi-prog argc limit");
+ break;
+ }
+ prog_argv[prog_argc++] = xstrdup(global_argv[i]);
+ }
prog_argv[prog_argc] = NULL;
*argc = prog_argc;
diff --git a/src/slurmd/slurmstepd/multi_prog.h b/src/slurmd/slurmstepd/multi_prog.h
index da7a046..71c2b26 100644
--- a/src/slurmd/slurmstepd/multi_prog.h
+++ b/src/slurmd/slurmstepd/multi_prog.h
@@ -47,5 +47,6 @@
* "task_rank" is the task's GLOBAL rank within the job step.
*/
extern int multi_prog_get_argv(char *config_data, char **prog_env,
- int task_rank, uint32_t *argc, char ***argv);
+ int task_rank, uint32_t *argc, char ***argv,
+ int global_argc, char **global_argv);
#endif /* !_SLURMD_MULTI_PROG_H */
diff --git a/src/slurmd/slurmstepd/pam_ses.c b/src/slurmd/slurmstepd/pam_ses.c
index 57d4f43..93afa8f 100644
--- a/src/slurmd/slurmstepd/pam_ses.c
+++ b/src/slurmd/slurmstepd/pam_ses.c
@@ -90,7 +90,7 @@
* handling resource limits. When a PAM session is opened on behalf of
* a user, the limits imposed by the sys admin are picked up. Opening
* a PAM session requires a PAM handle, which is obtained when the PAM
- * interface is intialized. (PAM handles are required with essentially
+ * interface is initialized. (PAM handles are required with essentially
* all PAM calls.) It's also necessary to have the users PAM credentials
* to open a user session.
*/
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index 478279d..3f3e5e2 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -83,7 +83,8 @@
static int _handle_suspend(int fd, slurmd_job_t *job, uid_t uid);
static int _handle_resume(int fd, slurmd_job_t *job, uid_t uid);
static int _handle_terminate(int fd, slurmd_job_t *job, uid_t uid);
-static int _handle_completion(int fd, slurmd_job_t *job, uid_t uid);
+static int _handle_completion(int fd, slurmd_job_t *job, uid_t uid,
+ int protocol);
static int _handle_stat_jobacct(int fd, slurmd_job_t *job, uid_t uid);
static int _handle_task_info(int fd, slurmd_job_t *job);
static int _handle_list_pids(int fd, slurmd_job_t *job);
@@ -457,7 +458,7 @@
if (rc == 0) { /* EOF, normal */
return -1;
} else {
- debug3("Leaving _handle_request on read error");
+ debug3("Leaving _handle_request on read error: %m");
return SLURM_FAILURE;
}
}
@@ -517,7 +518,11 @@
break;
case REQUEST_STEP_COMPLETION:
debug("Handling REQUEST_STEP_COMPLETION");
- rc = _handle_completion(fd, job, uid);
+ rc = _handle_completion(fd, job, uid, 1);
+ break;
+ case REQUEST_STEP_COMPLETION_V2:
+ debug("Handling REQUEST_STEP_COMPLETION_V2");
+ rc = _handle_completion(fd, job, uid, 2);
break;
case REQUEST_STEP_TASK_INFO:
debug("Handling REQUEST_STEP_TASK_INFO");
@@ -1273,7 +1278,7 @@
}
static int
-_handle_completion(int fd, slurmd_job_t *job, uid_t uid)
+_handle_completion(int fd, slurmd_job_t *job, uid_t uid, int protocol)
{
int rc = SLURM_SUCCESS;
int errnum = 0;
@@ -1281,6 +1286,11 @@
int last;
jobacctinfo_t *jobacct = NULL;
int step_rc;
+ char* buf;
+ int len;
+ Buf buffer;
+ int version; /* For future use */
+ bool lock_set = false;
debug("_handle_completion for job %u.%u",
job->jobid, job->stepid);
@@ -1297,16 +1307,38 @@
return SLURM_SUCCESS;
}
+ if (protocol >= 2)
+ safe_read(fd, &version, sizeof(int));
safe_read(fd, &first, sizeof(int));
safe_read(fd, &last, sizeof(int));
safe_read(fd, &step_rc, sizeof(int));
- jobacct = jobacct_gather_g_create(NULL);
- jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd);
+ if (protocol >= 2) {
+ /*
+ * We must not use getinfo over a pipe with slurmd here
+ * Indeed, slurmstepd does a large use of setinfo over a pipe
+ * with slurmd and doing the reverse can result in a deadlock
+ * scenario with slurmd :
+ * slurmd(lockforread,write)/slurmstepd(write,lockforread)
+ * Do pack/unpack instead to be sure of independances of
+ * slurmd and slurmstepd
+ */
+ safe_read(fd, &len, sizeof(int));
+ buf = xmalloc(len);
+ safe_read(fd, buf, len);
+ buffer = create_buf(buf, len);
+ jobacct_gather_g_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
+ buffer);
+ free_buf(buffer);
+ } else {
+ jobacct = jobacct_gather_g_create(NULL);
+ jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd);
+ }
/*
* Record the completed nodes
*/
pthread_mutex_lock(&step_complete.lock);
+ lock_set = true;
if (! step_complete.wait_children) {
rc = -1;
errnum = ETIMEDOUT; /* not used anyway */
@@ -1350,7 +1382,12 @@
pthread_mutex_unlock(&step_complete.lock);
return SLURM_SUCCESS;
-rwfail:
+
+
+rwfail: if (lock_set) {
+ pthread_cond_signal(&step_complete.cond);
+ pthread_mutex_unlock(&step_complete.lock);
+ }
return SLURM_FAILURE;
}
diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c
index d27e858..e8935ed 100644
--- a/src/slurmd/slurmstepd/slurmstepd.c
+++ b/src/slurmd/slurmstepd/slurmstepd.c
@@ -54,6 +54,7 @@
#include "src/common/switch.h"
#include "src/common/xmalloc.h"
#include "src/common/xsignal.h"
+#include "src/common/plugstack.h"
#include "src/slurmd/common/slurmstepd_init.h"
#include "src/slurmd/common/setproctitle.h"
@@ -76,6 +77,7 @@
#ifdef MEMORY_LEAK_DEBUG
static void _step_cleanup(slurmd_job_t *job, slurm_msg_t *msg, int rc);
#endif
+static int process_cmdline (int argc, char *argv[]);
int slurmstepd_blocked_signals[] = {
SIGPIPE, 0
@@ -96,11 +98,9 @@
gid_t *gids;
int rc = 0;
- if ((argc == 2) && (strcmp(argv[1], "getenv") == 0)) {
- print_rlimits();
- _dump_user_env();
- exit(0);
- }
+ if (process_cmdline (argc, argv) < 0)
+ fatal ("Error in slurmstepd command line");
+
xsignal_block(slurmstepd_blocked_signals);
conf = xmalloc(sizeof(*conf));
conf->argv = &argv;
@@ -165,6 +165,8 @@
xfree(cli);
xfree(self);
xfree(conf->hostname);
+ xfree(conf->block_map);
+ xfree(conf->block_map_inv);
xfree(conf->spooldir);
xfree(conf->node_name);
xfree(conf->node_topo_addr);
@@ -176,6 +178,147 @@
return rc;
}
+
+static slurmd_conf_t * read_slurmd_conf_lite (int fd)
+{
+ int rc;
+ int len;
+ Buf buffer;
+ slurmd_conf_t *confl;
+
+ /* First check to see if we've already initialized the
+ * global slurmd_conf_t in 'conf'. Allocate memory if not.
+ */
+ confl = conf ? conf : xmalloc (sizeof (*confl));
+
+ safe_read(fd, &len, sizeof(int));
+
+ buffer = init_buf(len);
+ safe_read(fd, buffer->head, len);
+
+ rc = unpack_slurmd_conf_lite_no_alloc(confl, buffer);
+ if (rc == SLURM_ERROR)
+ fatal("slurmstepd: problem with unpack of slurmd_conf");
+
+ free_buf(buffer);
+
+ confl->log_opts.stderr_level = confl->debug_level;
+ confl->log_opts.logfile_level = confl->debug_level;
+ confl->log_opts.syslog_level = confl->debug_level;
+ /*
+ * If daemonizing, turn off stderr logging -- also, if
+ * logging to a file, turn off syslog.
+ *
+ * Otherwise, if remaining in foreground, turn off logging
+ * to syslog (but keep logfile level)
+ */
+ if (confl->daemonize) {
+ confl->log_opts.stderr_level = LOG_LEVEL_QUIET;
+ if (confl->logfile)
+ confl->log_opts.syslog_level = LOG_LEVEL_QUIET;
+ } else
+ confl->log_opts.syslog_level = LOG_LEVEL_QUIET;
+
+ return (confl);
+rwfail:
+ return (NULL);
+}
+
+static int get_jobid_uid_from_env (uint32_t *jobidp, uid_t *uidp)
+{
+ const char *val;
+ char *p;
+
+ if (!(val = getenv ("SLURM_JOBID")))
+ return error ("Unable to get SLURM_JOBID in env!");
+
+ *jobidp = (uint32_t) strtoul (val, &p, 10);
+ if (*p != '\0')
+ return error ("Invalid SLURM_JOBID=%s", val);
+
+ if (!(val = getenv ("SLURM_UID")))
+ return error ("Unable to get SLURM_UID in env!");
+
+ *uidp = (uid_t) strtoul (val, &p, 10);
+ if (*p != '\0')
+ return error ("Invalid SLURM_UID=%s", val);
+
+ return (0);
+}
+
+static int handle_spank_mode (int argc, char *argv[])
+{
+ char prefix[64] = "spank-";
+ const char *mode = argv[2];
+ uid_t uid = (uid_t) -1;
+ uint32_t jobid = (uint32_t) -1;
+ log_options_t lopts = LOG_OPTS_INITIALIZER;
+
+ /*
+ * Not necessary to log to syslog
+ */
+ lopts.syslog_level = LOG_LEVEL_QUIET;
+
+ /*
+ * Make our log prefix into spank-prolog: or spank-epilog:
+ */
+ strcat (prefix, mode);
+ log_init(prefix, lopts, LOG_DAEMON, NULL);
+
+ /*
+ * When we are started from slurmd, a lightweight config is
+ * sent over the stdin fd. If we are able to read this conf
+ * use it to reinitialize the log.
+ * It is not a fatal error if we fail to read the conf file.
+ * This could happen if slurmstepd is run standalone for
+ * testing.
+ */
+ if ((conf = read_slurmd_conf_lite (STDIN_FILENO)))
+ log_alter (conf->log_opts, 0, conf->logfile);
+ close (STDIN_FILENO);
+
+ if (slurm_conf_init(NULL) != SLURM_SUCCESS)
+ return error ("Failed to read slurm config");
+
+ if (get_jobid_uid_from_env (&jobid, &uid) < 0)
+ return error ("spank environment invalid");
+
+ debug("Running spank/%s for jobid [%u] uid [%u]", mode, jobid, uid);
+
+ if (strcmp (mode, "prolog") == 0) {
+ if (spank_job_prolog (jobid, uid) < 0)
+ return (-1);
+ }
+ else if (strcmp (mode, "epilog") == 0) {
+ if (spank_job_epilog (jobid, uid) < 0)
+ return (-1);
+ }
+ else {
+ error ("Invalid mode %s specified!", mode);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * Process special "modes" of slurmstepd passed as cmdline arguments.
+ */
+static int process_cmdline (int argc, char *argv[])
+{
+ if ((argc == 2) && (strcmp(argv[1], "getenv") == 0)) {
+ print_rlimits();
+ _dump_user_env();
+ exit(0);
+ }
+ if ((argc == 3) && (strcmp(argv[1], "spank") == 0)) {
+ if (handle_spank_mode (argc, argv) < 0)
+ exit (1);
+ exit (0);
+ }
+ return (0);
+}
+
+
static void
_send_ok_to_slurmd(int sock)
{
@@ -241,32 +384,8 @@
pthread_mutex_unlock(&step_complete.lock);
/* receive conf from slurmd */
- safe_read(sock, &len, sizeof(int));
- incoming_buffer = xmalloc(len);
- safe_read(sock, incoming_buffer, len);
- buffer = create_buf(incoming_buffer,len);
- if(unpack_slurmd_conf_lite_no_alloc(conf, buffer) == SLURM_ERROR) {
- fatal("slurmstepd: problem with unpack of slurmd_conf");
- }
- free_buf(buffer);
-
- conf->log_opts.stderr_level = conf->debug_level;
- conf->log_opts.logfile_level = conf->debug_level;
- conf->log_opts.syslog_level = conf->debug_level;
-
- /*
- * If daemonizing, turn off stderr logging -- also, if
- * logging to a file, turn off syslog.
- *
- * Otherwise, if remaining in foreground, turn off logging
- * to syslog (but keep logfile level)
- */
- if (conf->daemonize) {
- conf->log_opts.stderr_level = LOG_LEVEL_QUIET;
- if (conf->logfile)
- conf->log_opts.syslog_level = LOG_LEVEL_QUIET;
- } else
- conf->log_opts.syslog_level = LOG_LEVEL_QUIET;
+ if ((conf = read_slurmd_conf_lite (sock)) == NULL)
+ fatal("Failed to read conf from slurmd");
log_alter(conf->log_opts, 0, conf->logfile);
debug2("debug level is %d.", conf->debug_level);
@@ -285,27 +404,29 @@
safe_read(sock, incoming_buffer, len);
buffer = create_buf(incoming_buffer,len);
cli = xmalloc(sizeof(slurm_addr_t));
- if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) {
+ if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR)
fatal("slurmstepd: problem with unpack of slurmd_conf");
- }
free_buf(buffer);
/* receive self from slurmd */
safe_read(sock, &len, sizeof(int));
- if(len > 0) {
+ if (len > 0) {
/* receive packed self from main slurmd */
incoming_buffer = xmalloc(sizeof(char) * len);
safe_read(sock, incoming_buffer, len);
buffer = create_buf(incoming_buffer,len);
self = xmalloc(sizeof(slurm_addr_t));
- if(slurm_unpack_slurm_addr_no_alloc(self, buffer)
- == SLURM_ERROR) {
+ if (slurm_unpack_slurm_addr_no_alloc(self, buffer)
+ == SLURM_ERROR) {
fatal("slurmstepd: problem with unpack of "
"slurmd_conf");
}
free_buf(buffer);
}
+ /* Receive GRES information from slurmd */
+ gres_plugin_recv_stepd(sock);
+
/* receive req from slurmd */
safe_read(sock, &len, sizeof(int));
incoming_buffer = xmalloc(sizeof(char) * len);
@@ -314,6 +435,7 @@
msg = xmalloc(sizeof(slurm_msg_t));
slurm_msg_t_init(msg);
+ msg->protocol_version = SLURM_PROTOCOL_VERSION;
switch(step_type) {
case LAUNCH_BATCH_JOB:
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index 5b8d595..7855d1f 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -369,7 +369,8 @@
job->state = SLURMSTEPD_STEP_STARTING;
job->pwd = pwd;
- job->cpus = msg->cpus_per_node[0];
+ if (msg->cpus_per_node)
+ job->cpus = msg->cpus_per_node[0];
job->node_tasks = 1;
job->ntasks = msg->ntasks;
job->jobid = msg->job_id;
@@ -408,7 +409,8 @@
job->envtp->ckpt_dir = NULL;
job->envtp->restart_cnt = msg->restart_cnt;
- job->cpus_per_task = msg->cpus_per_node[0];
+ if (msg->cpus_per_node)
+ job->cpus = msg->cpus_per_node[0];
format_core_allocs(msg->cred, conf->node_name,
&job->job_alloc_cores, &job->step_alloc_cores,
&job->job_mem, &job->step_mem);
@@ -520,7 +522,8 @@
if (job->multi_prog) {
multi_prog_get_argv(job->argv[1], job->env, gtid[i],
&job->task[i]->argc,
- &job->task[i]->argv);
+ &job->task[i]->argv,
+ job->argc, job->argv);
} else {
job->task[i]->argc = job->argc;
job->task[i]->argv = job->argv;
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h
index 5dd5a7a..32690d2 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.h
+++ b/src/slurmd/slurmstepd/slurmstepd_job.h
@@ -232,7 +232,7 @@
static inline slurmd_task_info_t *
job_task_info_by_pid (slurmd_job_t *job, pid_t pid)
{
- int i;
+ uint32_t i;
for (i = 0; i < job->node_tasks; i++) {
if (job->task[i]->pid == pid)
return (job->task[i]);
diff --git a/src/slurmd/slurmstepd/step_terminate_monitor.c b/src/slurmd/slurmstepd/step_terminate_monitor.c
index 56b2254..edff74b 100644
--- a/src/slurmd/slurmstepd/step_terminate_monitor.c
+++ b/src/slurmd/slurmstepd/step_terminate_monitor.c
@@ -35,6 +35,10 @@
#include "src/common/read_config.h"
#include "src/slurmd/slurmstepd/step_terminate_monitor.h"
+#if defined(__NetBSD__)
+#include <sys/types.h> /* for pid_t */
+#include <sys/signal.h> /* for SIGKILL */
+#endif
#include <stdlib.h>
#include <sys/wait.h>
#include <sys/errno.h>
diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c
index 0137fdc..9f7d4bc 100644
--- a/src/slurmd/slurmstepd/task.c
+++ b/src/slurmd/slurmstepd/task.c
@@ -41,6 +41,10 @@
# include "config.h"
#endif
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
@@ -480,16 +484,30 @@
if (!(tmpdir = getenvp(job->env, "TMPDIR")))
setenvf(&job->env, "TMPDIR", "/tmp"); /* task may want it set */
else if (mkdir(tmpdir, 0700) < 0) {
- if (errno == EEXIST) {
- struct stat st;
+ struct stat st;
+ int mkdir_errno = errno;
- if (stat(tmpdir, &st) == 0 && /* does user have access? */
- S_ISDIR(st.st_mode) && /* is it a directory? */
- ((st.st_mode & S_IWOTH) || /* can user write there? */
- (st.st_uid == job->uid && (st.st_mode & S_IWUSR))))
- return;
+ if (stat(tmpdir, &st)) { /* does the file exist ? */
+ /* show why we were not able to create it */
+ error("Unable to create TMPDIR [%s]: %s",
+ tmpdir, strerror(mkdir_errno));
+ } else if (!S_ISDIR(st.st_mode)) { /* is it a directory? */
+ error("TMPDIR [%s] is not a directory", tmpdir);
}
- error("Unable to create TMPDIR [%s]: %m", tmpdir);
+
+ /* Eaccess wasn't introduced until glibc 2.4 but euidaccess
+ * has been around for a while. So to make sure we
+ * still work with older systems we include this check.
+ */
+#if defined __GLIBC__ && __GLIBC_PREREQ(2, 4)
+ else if (eaccess(tmpdir, X_OK|W_OK)) /* check permissions */
+#else
+ else if (euidaccess(tmpdir, X_OK|W_OK))
+#endif
+ error("TMPDIR [%s] is not writeable", tmpdir);
+ else
+ return;
+
error("Setting TMPDIR to /tmp");
setenvf(&job->env, "TMPDIR", "/tmp");
}
diff --git a/src/slurmdbd/Makefile.in b/src/slurmdbd/Makefile.in
index 10546e2..da0dcb8 100644
--- a/src/slurmdbd/Makefile.in
+++ b/src/slurmdbd/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -193,6 +193,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -229,6 +230,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -415,7 +417,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-slurmdbd$(EXEEXT): $(slurmdbd_OBJECTS) $(slurmdbd_DEPENDENCIES)
+slurmdbd$(EXEEXT): $(slurmdbd_OBJECTS) $(slurmdbd_DEPENDENCIES) $(EXTRA_slurmdbd_DEPENDENCIES)
@rm -f slurmdbd$(EXEEXT)
$(slurmdbd_LINK) $(slurmdbd_OBJECTS) $(slurmdbd_LDADD) $(LIBS)
@@ -558,10 +560,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c
index e374d82..db797e0 100644
--- a/src/slurmdbd/proc_req.c
+++ b/src/slurmdbd/proc_req.c
@@ -1150,21 +1150,56 @@
return rc;
}
+static int _unpack_config_name(char **object, Buf buffer)
+{
+ char *config_name;
+ uint32_t uint32_tmp;
+
+ safe_unpackstr_xmalloc(&config_name, &uint32_tmp, buffer);
+ *object = config_name;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ *object = NULL;
+ return SLURM_ERROR;
+}
+
static int _get_config(slurmdbd_conn_t *slurmdbd_conn,
Buf in_buffer, Buf *out_buffer, uint32_t *uid)
{
+ char *config_name = NULL;
dbd_list_msg_t list_msg = { NULL };
debug2("DBD_GET_CONFIG: called");
- /* No message body to unpack */
+ if (slurmdbd_conn->rpc_version >= 10 &&
+ _unpack_config_name(&config_name, in_buffer) != SLURM_SUCCESS) {
+ char *comment = "Failed to unpack DBD_GET_CONFIG message";
+ error("CONN:%u %s", slurmdbd_conn->newsockfd, comment);
+ *out_buffer = make_dbd_rc_msg(slurmdbd_conn->rpc_version,
+ SLURM_ERROR, comment,
+ DBD_GET_CONFIG);
+ return SLURM_ERROR;
+ }
- list_msg.my_list = dump_config();
+ if (config_name == NULL ||
+ strcmp(config_name, "slurmdbd.conf") == 0)
+ list_msg.my_list = dump_config();
+ else if ((list_msg.my_list = acct_storage_g_get_config(
+ slurmdbd_conn->db_conn, config_name)) == NULL) {
+ *out_buffer = make_dbd_rc_msg(slurmdbd_conn->rpc_version,
+ errno, slurm_strerror(errno),
+ DBD_GET_CONFIG);
+ xfree(config_name);
+ return SLURM_ERROR;
+ }
+
*out_buffer = init_buf(1024);
pack16((uint16_t) DBD_GOT_CONFIG, *out_buffer);
slurmdbd_pack_list_msg(&list_msg, slurmdbd_conn->rpc_version,
DBD_GOT_CONFIG, *out_buffer);
if (list_msg.my_list)
list_destroy(list_msg.my_list);
+ xfree(config_name);
return SLURM_SUCCESS;
}
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index b46baca..df3f73b 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -138,7 +138,7 @@
{"DbdBackupHost", S_P_STRING},
{"DbdHost", S_P_STRING},
{"DbdPort", S_P_UINT16},
- {"DebugLevel", S_P_UINT16},
+ {"DebugLevel", S_P_STRING},
{"DefaultQOS", S_P_STRING},
{"JobPurge", S_P_UINT32},
{"LogFile", S_P_STRING},
@@ -212,7 +212,14 @@
s_p_get_string(&slurmdbd_conf->dbd_host, "DbdHost", tbl);
s_p_get_string(&slurmdbd_conf->dbd_addr, "DbdAddr", tbl);
s_p_get_uint16(&slurmdbd_conf->dbd_port, "DbdPort", tbl);
- s_p_get_uint16(&slurmdbd_conf->debug_level, "DebugLevel", tbl);
+
+ if (s_p_get_string(&temp_str, "DebugLevel", tbl)) {
+ slurmdbd_conf->debug_level = log_string2num(temp_str);
+ if (slurmdbd_conf->debug_level == (uint16_t) NO_VAL)
+ fatal("Invalid DebugLevel %s", temp_str);
+ xfree(temp_str);
+ }
+
s_p_get_string(&slurmdbd_conf->default_qos, "DefaultQOS", tbl);
if (s_p_get_uint32(&slurmdbd_conf->purge_job,
"JobPurge", tbl)) {
@@ -684,7 +691,8 @@
key_pair = xmalloc(sizeof(config_key_pair_t));
key_pair->name = xstrdup("DebugLevel");
key_pair->value = xmalloc(32);
- snprintf(key_pair->value, 32, "%u", slurmdbd_conf->debug_level);
+ snprintf(key_pair->value, 32, "%s",
+ log_num2string(slurmdbd_conf->debug_level));
list_append(my_list, key_pair);
key_pair = xmalloc(sizeof(config_key_pair_t));
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index dc38fef..5513f01 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -40,6 +40,9 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
+#if HAVE_SYS_PRCTL_H
+# include <sys/prctl.h>
+#endif
#include <grp.h>
#include <pthread.h>
#include <signal.h>
@@ -96,9 +99,10 @@
static void _request_registrations(void *db_conn);
static void _rollup_handler_cancel();
static void *_rollup_handler(void *no_data);
-static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec);
+static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec);
+static void _set_work_dir(void);
static void *_signal_handler(void *no_data);
-static void _update_logging(void);
+static void _update_logging(bool startup);
static void _update_nice(void);
static void _usage(char *prog_name);
@@ -115,7 +119,7 @@
if (read_slurmdbd_conf())
exit(1);
_parse_commandline(argc, argv);
- _update_logging();
+ _update_logging(true);
_update_nice();
if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
@@ -140,6 +144,8 @@
*/
_init_pidfile();
_become_slurm_user();
+ if (foreground == 0)
+ _set_work_dir();
log_config();
#ifdef PR_SET_DUMPABLE
@@ -357,7 +363,7 @@
}
/* Reset slurmctld logging based upon configuration parameters */
-static void _update_logging(void)
+static void _update_logging(bool startup)
{
/* Preserve execute line arguments (if any) */
if (debug_level) {
@@ -379,6 +385,19 @@
}
log_alter(log_opts, SYSLOG_FACILITY_DAEMON, slurmdbd_conf->log_file);
+ if (startup && slurmdbd_conf->log_file) {
+ int rc;
+ gid_t slurm_user_gid;
+ slurm_user_gid = gid_from_uid(slurmdbd_conf->slurm_user_id);
+ rc = chown(slurmdbd_conf->log_file,
+ slurmdbd_conf->slurm_user_id, slurm_user_gid);
+ if (rc) {
+ error("chown(%s, %d, %d): %m",
+ slurmdbd_conf->log_file,
+ (int) slurmdbd_conf->slurm_user_id,
+ (int) slurm_user_gid);
+ }
+ }
}
/* Reset slurmd nice value */
@@ -444,6 +463,11 @@
if (daemon(1, 1))
error("daemon(): %m");
log_alter(log_opts, LOG_DAEMON, slurmdbd_conf->log_file);
+}
+
+static void _set_work_dir(void)
+{
+ bool success = false;
if (slurmdbd_conf->log_file &&
(slurmdbd_conf->log_file[0] == '/')) {
@@ -454,10 +478,20 @@
work_dir[1] = '\0';
else
slash_ptr[0] = '\0';
- if (chdir(work_dir) < 0)
- fatal("chdir(%s): %m", work_dir);
+ if ((access(work_dir, W_OK) != 0) || (chdir(work_dir) < 0))
+ error("chdir(%s): %m", work_dir);
+ else
+ success = true;
xfree(work_dir);
}
+
+ if (!success) {
+ if ((access("/var/tmp", W_OK) != 0) ||
+ (chdir("/var/tmp") < 0)) {
+ error("chdir(/var/tmp): %m");
+ } else
+ info("chdir to /var/tmp");
+ }
}
static void _request_registrations(void *db_conn)
@@ -607,7 +641,7 @@
info("Reconfigure signal (SIGHUP) received");
read_slurmdbd_conf();
assoc_mgr_set_missing_uids();
- _update_logging();
+ _update_logging(false);
break;
case SIGINT: /* kill -2 or <CTRL-C> */
case SIGTERM: /* kill -15 */
diff --git a/src/smap/Makefile.am b/src/smap/Makefile.am
index e5f8571..67438cf 100644
--- a/src/smap/Makefile.am
+++ b/src/smap/Makefile.am
@@ -9,7 +9,7 @@
LIBS=$(NCURSES)
INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
-if HAVE_SOME_CURSES
+if BUILD_SMAP
bin_PROGRAMS = smap
diff --git a/src/smap/Makefile.in b/src/smap/Makefile.in
index 6dc74bd..054c8a3 100644
--- a/src/smap/Makefile.in
+++ b/src/smap/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -42,9 +42,9 @@
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-@HAVE_SOME_CURSES_TRUE@bin_PROGRAMS = smap$(EXEEXT)
-@BLUEGENE_LOADED_TRUE@@HAVE_SOME_CURSES_TRUE@am__append_1 = $(bg_dir)/libconfigure_api.la
-@BLUEGENE_LOADED_TRUE@@HAVE_SOME_CURSES_TRUE@am__append_2 = configure_functions.c
+@BUILD_SMAP_TRUE@bin_PROGRAMS = smap$(EXEEXT)
+@BLUEGENE_LOADED_TRUE@@BUILD_SMAP_TRUE@am__append_1 = $(bg_dir)/libconfigure_api.la
+@BLUEGENE_LOADED_TRUE@@BUILD_SMAP_TRUE@am__append_2 = configure_functions.c
subdir = src/smap
DIST_COMMON = $(am__noinst_HEADERS_DIST) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
@@ -98,21 +98,21 @@
am__smap_SOURCES_DIST = smap.c job_functions.c partition_functions.c \
grid_functions.c reservation_functions.c opts.c \
configure_functions.c
-@BLUEGENE_LOADED_TRUE@@HAVE_SOME_CURSES_TRUE@am__objects_1 = configure_functions.$(OBJEXT)
-@HAVE_SOME_CURSES_TRUE@am_smap_OBJECTS = smap.$(OBJEXT) \
-@HAVE_SOME_CURSES_TRUE@ job_functions.$(OBJEXT) \
-@HAVE_SOME_CURSES_TRUE@ partition_functions.$(OBJEXT) \
-@HAVE_SOME_CURSES_TRUE@ grid_functions.$(OBJEXT) \
-@HAVE_SOME_CURSES_TRUE@ reservation_functions.$(OBJEXT) \
-@HAVE_SOME_CURSES_TRUE@ opts.$(OBJEXT) $(am__objects_1)
+@BLUEGENE_LOADED_TRUE@@BUILD_SMAP_TRUE@am__objects_1 = configure_functions.$(OBJEXT)
+@BUILD_SMAP_TRUE@am_smap_OBJECTS = smap.$(OBJEXT) \
+@BUILD_SMAP_TRUE@ job_functions.$(OBJEXT) \
+@BUILD_SMAP_TRUE@ partition_functions.$(OBJEXT) \
+@BUILD_SMAP_TRUE@ grid_functions.$(OBJEXT) \
+@BUILD_SMAP_TRUE@ reservation_functions.$(OBJEXT) \
+@BUILD_SMAP_TRUE@ opts.$(OBJEXT) $(am__objects_1)
am__EXTRA_smap_SOURCES_DIST = smap.h smap.c job_functions.c \
partition_functions.c configure_functions.c grid_functions.c \
reservation_functions.c opts.c
smap_OBJECTS = $(am_smap_OBJECTS)
am__DEPENDENCIES_1 =
-@HAVE_SOME_CURSES_TRUE@smap_DEPENDENCIES = \
-@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o \
-@HAVE_SOME_CURSES_TRUE@ $(am__DEPENDENCIES_1) $(am__append_1)
+@BUILD_SMAP_TRUE@smap_DEPENDENCIES = \
+@BUILD_SMAP_TRUE@ $(top_builddir)/src/api/libslurm.o \
+@BUILD_SMAP_TRUE@ $(am__DEPENDENCIES_1) $(am__append_1)
smap_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(smap_LDFLAGS) \
$(LDFLAGS) -o $@
@@ -213,6 +213,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -249,6 +250,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -338,20 +340,19 @@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
-@HAVE_SOME_CURSES_TRUE@noinst_HEADERS = smap.h
-@HAVE_SOME_CURSES_TRUE@smap_SOURCES = smap.c job_functions.c \
-@HAVE_SOME_CURSES_TRUE@ partition_functions.c grid_functions.c \
-@HAVE_SOME_CURSES_TRUE@ reservation_functions.c opts.c \
-@HAVE_SOME_CURSES_TRUE@ $(am__append_2)
-@HAVE_SOME_CURSES_TRUE@smap_LDADD = \
-@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o \
-@HAVE_SOME_CURSES_TRUE@ $(DL_LIBS) $(am__append_1)
-@BLUEGENE_LOADED_TRUE@@HAVE_SOME_CURSES_TRUE@bg_dir = $(top_builddir)/src/plugins/select/bluegene
-@HAVE_SOME_CURSES_TRUE@smap_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
-@HAVE_SOME_CURSES_FALSE@EXTRA_smap_SOURCES = smap.h smap.c \
-@HAVE_SOME_CURSES_FALSE@ job_functions.c partition_functions.c \
-@HAVE_SOME_CURSES_FALSE@ configure_functions.c grid_functions.c \
-@HAVE_SOME_CURSES_FALSE@ reservation_functions.c opts.c
+@BUILD_SMAP_TRUE@noinst_HEADERS = smap.h
+@BUILD_SMAP_TRUE@smap_SOURCES = smap.c job_functions.c \
+@BUILD_SMAP_TRUE@ partition_functions.c grid_functions.c \
+@BUILD_SMAP_TRUE@ reservation_functions.c opts.c \
+@BUILD_SMAP_TRUE@ $(am__append_2)
+@BUILD_SMAP_TRUE@smap_LDADD = $(top_builddir)/src/api/libslurm.o \
+@BUILD_SMAP_TRUE@ $(DL_LIBS) $(am__append_1)
+@BLUEGENE_LOADED_TRUE@@BUILD_SMAP_TRUE@bg_dir = $(top_builddir)/src/plugins/select/bluegene
+@BUILD_SMAP_TRUE@smap_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
+@BUILD_SMAP_FALSE@EXTRA_smap_SOURCES = smap.h smap.c \
+@BUILD_SMAP_FALSE@ job_functions.c partition_functions.c \
+@BUILD_SMAP_FALSE@ configure_functions.c grid_functions.c \
+@BUILD_SMAP_FALSE@ reservation_functions.c opts.c
all: all-am
@@ -430,7 +431,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-smap$(EXEEXT): $(smap_OBJECTS) $(smap_DEPENDENCIES)
+smap$(EXEEXT): $(smap_OBJECTS) $(smap_DEPENDENCIES) $(EXTRA_smap_DEPENDENCIES)
@rm -f smap$(EXEEXT)
$(smap_LINK) $(smap_OBJECTS) $(smap_LDADD) $(LIBS)
@@ -574,10 +575,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -676,9 +682,9 @@
uninstall-binPROGRAMS
-@HAVE_SOME_CURSES_TRUE@force:
-@HAVE_SOME_CURSES_TRUE@$(smap_LDADD) : force
-@HAVE_SOME_CURSES_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@`
+@BUILD_SMAP_TRUE@force:
+@BUILD_SMAP_TRUE@$(smap_LDADD) : force
+@BUILD_SMAP_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@`
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c
index 9f4cc3e..3ac4a2c 100644
--- a/src/smap/configure_functions.c
+++ b/src/smap/configure_functions.c
@@ -614,7 +614,7 @@
}
if (com[i] == 'r')
com[i] = 'R';
- ret_str = resolve_mp(com+i);
+ ret_str = resolve_mp(com+i, NULL);
if (ret_str) {
snprintf(error_string, sizeof(error_string), "%s", ret_str);
xfree(ret_str);
@@ -911,8 +911,8 @@
"RamDiskImage=%s/ramdisk.elf\n",
image_dir);
- xstrcat(save_string, "Numpsets=8 # io poor\n");
- xstrcat(save_string, "# Numpsets=64 # io rich\n");
+ xstrcat(save_string, "IONodesPerMP=8 # io poor\n");
+ xstrcat(save_string, "# IONodesPerMP=64 # io rich\n");
#elif defined HAVE_BGP
image_dir = "/bgsys/drivers/ppcfloor/boot";
xstrfmtcat(save_string, "CnloadImage=%s/cns,%s/cnk\n",
@@ -922,14 +922,14 @@
xstrfmtcat(save_string,
"IoloadImage=%s/cns,%s/linux,%s/ramdisk\n",
image_dir, image_dir, image_dir);
- xstrcat(save_string, "Numpsets=4 # io poor\n");
- xstrcat(save_string, "# Numpsets=32 # io rich\n");
+ xstrcat(save_string, "IONodesPerMP=4 # io poor\n");
+ xstrcat(save_string, "# IONodesPerMP=32 # io rich\n");
#else
image_dir = "/bgsys/drivers/ppcfloor/boot";
- xstrfmtcat(save_string, "MloaderImage=%s/uloader\n",
+ xstrfmtcat(save_string, "MloaderImage=%s/firmware\n",
image_dir);
- xstrcat(save_string, "Numpsets=4 # io semi-poor\n");
- xstrcat(save_string, "# Numpsets=16 # io rich\n");
+ xstrcat(save_string, "IONodesPerMP=4 # io semi-poor\n");
+ xstrcat(save_string, "# IONodesPerMP=16 # io rich\n");
#endif
xstrcat(save_string, "BridgeAPILogFile="
@@ -985,7 +985,7 @@
#endif
}
- xstrfmtcat(save_string, "BPs=%s", request->save_name);
+ xstrfmtcat(save_string, "MPs=%s", request->save_name);
for (i=0; i<SYSTEM_DIMENSIONS; i++) {
if (request->conn_type[i] == (uint16_t)NO_VAL)
@@ -1200,11 +1200,14 @@
if (strcasecmp(layout_mode, "DYNAMIC")) {
if (!s_p_get_array((void ***)&blockreq_array,
- &count, "BPs", tbl)) {
- memset(error_string, 0, 255);
- sprintf(error_string,
- "WARNING: no blocks defined in "
- "bluegene.conf");
+ &count, "MPs", tbl)) {
+ if (!s_p_get_array((void ***)&blockreq_array,
+ &count, "BPs", tbl)) {
+ memset(error_string, 0, 255);
+ sprintf(error_string,
+ "WARNING: no blocks defined in "
+ "bluegene.conf");
+ }
}
for (i = 0; i < count; i++) {
@@ -1237,7 +1240,7 @@
main_xcord += 7;
#ifdef HAVE_BG
mvwprintw(text_win, main_ycord,
- main_xcord, "BP_COUNT");
+ main_xcord, "MIDPLANES");
#else
mvwprintw(text_win, main_ycord,
main_xcord, "NODES");
@@ -1267,7 +1270,7 @@
#endif
#ifdef HAVE_BG
mvwprintw(text_win, main_ycord,
- main_xcord, "BP_LIST");
+ main_xcord, "MIDPLANELIST");
#else
mvwprintw(text_win, main_ycord,
main_xcord, "NODELIST");
@@ -1397,6 +1400,9 @@
xfree(cluster_name);
}
+ /* make sure we don't get any noisy debug */
+ ba_configure_set_ba_debug_flags(0);
+
bg_configure_ba_setup_wires();
color_count = 0;
@@ -1407,9 +1413,6 @@
snprintf(com, sizeof(com), "%s", params.command);
goto run_command;
} else {
- /* make sure we don't get any noisy debug */
- ba_configure_set_ba_debug_flags(0);
-
text_width = text_win->_maxx;
text_startx = text_win->_begx;
command_win = newwin(3, text_width - 1, LINES - 4,
diff --git a/src/smap/grid_functions.c b/src/smap/grid_functions.c
index 2f0fe3a..e02af52 100644
--- a/src/smap/grid_functions.c
+++ b/src/smap/grid_functions.c
@@ -164,7 +164,7 @@
}
/* Build the smap_system_ptr structure from the node records */
-extern void init_grid(node_info_msg_t *node_info_ptr)
+extern void init_grid(node_info_msg_t *node_info_ptr, int cols)
{
int i, j, len;
int default_y_offset = 0;
@@ -265,11 +265,13 @@
default_y_offset = (dim_size[3] * dim_size[2]) +
(dim_size[2] - dim_size[3]);
}
+ if (cols == 0)
+ cols = 80;
for (i = 0; i < smap_system_ptr->node_cnt; i++) {
smap_node = smap_system_ptr->grid[i];
if (params.cluster_dims == 1) {
- smap_node->grid_xcord = i + 1;
- smap_node->grid_ycord = 1;
+ smap_node->grid_xcord = (i % cols) + 1;
+ smap_node->grid_ycord = (i / cols) + 1;
} else if (params.cluster_dims == 2) {
smap_node->grid_xcord = smap_node->coord[0] + 1;
smap_node->grid_ycord =
diff --git a/src/smap/job_functions.c b/src/smap/job_functions.c
index 3a5dd63..234ec0b 100644
--- a/src/smap/job_functions.c
+++ b/src/smap/job_functions.c
@@ -138,7 +138,7 @@
if (!params.commandline) {
if ((count >= text_line_cnt) &&
- (printed_jobs < (text_win->_maxy-3))) {
+ (printed_jobs < (getmaxy(text_win) - 4))) {
job_ptr->num_cpus =
(int)letters[count%62];
wattron(text_win,
@@ -166,7 +166,7 @@
if (!params.commandline) {
if ((count>=text_line_cnt) &&
- (printed_jobs < (text_win->_maxy-3))) {
+ (printed_jobs < (getmaxy(text_win) - 4))) {
xfree(job_ptr->nodes);
job_ptr->nodes = xstrdup("waiting...");
job_ptr->num_cpus = (int) letters[count%62];
@@ -239,7 +239,7 @@
main_xcord += 6;
if (params.cluster_flags & CLUSTER_FLAG_BG)
mvwprintw(text_win, main_ycord,
- main_xcord, "BP_LIST");
+ main_xcord, "MIDPLANELIST");
else
mvwprintw(text_win, main_ycord,
main_xcord, "NODELIST");
@@ -256,7 +256,7 @@
printf(" TIME ");
printf("NODES ");
if (params.cluster_flags & CLUSTER_FLAG_BG)
- printf("BP_LIST\n");
+ printf("MIDPLANELIST\n");
else
printf("NODELIST\n");
}
@@ -387,8 +387,7 @@
return printed;
}
main_xcord++;
- width = text_win->_maxx
- - main_xcord;
+ width = getmaxx(text_win) - 1 - main_xcord;
if (job_ptr->nodes[i] == '[')
prefixlen = i + 1;
else if (job_ptr->nodes[i] == ','
diff --git a/src/smap/opts.c b/src/smap/opts.c
index fb86a4f..9a7d87b 100644
--- a/src/smap/opts.c
+++ b/src/smap/opts.c
@@ -39,6 +39,7 @@
#include "src/smap/smap.h"
#include "src/common/proc_args.h"
+#include "src/common/xstring.h"
/* FUNCTIONS */
static void _help(void);
@@ -201,44 +202,90 @@
extern void clear_window(WINDOW *win)
{
- int x,y;
- for(x=0; x<=win->_maxx; x++)
- for(y=0; y<win->_maxy; y++) {
+ int x, y;
+ for (x = 0; x < getmaxx(win); x++)
+ for (y = 0; y < getmaxy(win); y++) {
mvwaddch(win, y, x, ' ');
}
wmove(win, 1, 1);
wnoutrefresh(win);
}
-extern char *resolve_mp(char *desc)
+extern char *resolve_mp(char *desc, node_info_msg_t *node_info_ptr)
{
char *ret_str = NULL;
#if defined HAVE_BG_FILES
ba_mp_t *ba_mp = NULL;
- int i;
+ int i, start_pos;
+ char *name;
if (!desc) {
ret_str = xstrdup("No Description given.\n");
goto fini;
}
-#ifdef HAVE_BG
- bg_configure_ba_setup_wires();
-#endif
- i = strlen(desc) - params.cluster_dims;
- if (i < 0) {
+ start_pos = strlen(desc) - params.cluster_dims;
+ if (start_pos < 0) {
ret_str = xstrdup_printf("Must enter %d coords to resolve.\n",
params.cluster_dims);
goto fini;
}
+ if (desc[0] != 'R')
+ name = desc+start_pos;
+ else
+ name = desc;
+
+ if (node_info_ptr) {
+ for (i=0; i<node_info_ptr->record_count; i++) {
+ char *rack_mid, *node_geo;
+ node_info_t *node_ptr = &(node_info_ptr->node_array[i]);
+
+ if (!node_ptr->name || (node_ptr->name[0] == '\0'))
+ continue;
+ start_pos = strlen(node_ptr->name)
+ - params.cluster_dims;
+ node_geo = node_ptr->name+start_pos;
+
+ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
+ SELECT_NODEDATA_RACK_MP,
+ 0, &rack_mid);
+ if (!rack_mid)
+ break;
+ if (desc[0] != 'R') {
+ if (!strcasecmp(name, node_geo))
+ ret_str = xstrdup_printf(
+ "%s resolves to %s\n",
+ node_geo, rack_mid);
+ } else if (!strcasecmp(name, rack_mid))
+ ret_str = xstrdup_printf(
+ "%s resolves to %s\n",
+ rack_mid, node_geo);
+
+ xfree(rack_mid);
+ if (ret_str)
+ return ret_str;
+ }
+ if (desc[0] != 'R')
+ ret_str = xstrdup_printf("%s has no resolve\n", name);
+ else
+ ret_str = xstrdup_printf("%s has no resolve.\n", desc);
+ return ret_str;
+ }
+
+ /* Quite any errors that could come our way here. */
+ ba_configure_set_ba_debug_flags(0);
+
+ bg_configure_ba_setup_wires();
+
if (desc[0] != 'R') {
- ba_mp = bg_configure_str2ba_mp(desc+i);
+ ba_mp = bg_configure_str2ba_mp(name);
if (ba_mp)
ret_str = xstrdup_printf("%s resolves to %s\n",
ba_mp->coord_str, ba_mp->loc);
else
- ret_str = xstrdup_printf("%s has no resolve\n", desc+i);
+ ret_str = xstrdup_printf("%s has no resolve\n",
+ name);
} else {
ba_mp = bg_configure_loc2ba_mp(desc);
if (ba_mp)
diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c
index 604b1ab..b3723a8 100644
--- a/src/smap/partition_functions.c
+++ b/src/smap/partition_functions.c
@@ -49,9 +49,8 @@
char *bg_block_name;
uint16_t bg_conn_type[HIGHEST_DIMENSIONS];
uint16_t bg_node_use;
- char *bg_user_name;
char *ionode_str;
- int job_running;
+ List job_list;
int letter_num;
List nodelist;
char *mp_str;
@@ -66,7 +65,6 @@
static void _block_list_del(void *object);
static int _in_slurm_partition(List slurm_nodes, List bg_nodes);
-static int _list_match_all(void *object, void *key);
static int _make_nodelist(char *nodes, List nodelist);
static void _marknodes(db2_block_info_t *block_ptr, int count);
static void _nodelist_del(void *object);
@@ -128,7 +126,7 @@
else
recs = 0;
if (!params.commandline)
- if ((recs - text_line_cnt) < (text_win->_maxy - 3))
+ if ((recs - text_line_cnt) < (getmaxy(text_win) - 4))
text_line_cnt--;
if (params.hl)
@@ -256,7 +254,7 @@
}
if (block_list) {
/* clear the old list */
- list_delete_all(block_list, _list_match_all, NULL);
+ list_flush(block_list);
} else {
block_list = list_create(_block_list_del);
if (!block_list) {
@@ -266,7 +264,7 @@
}
if (!params.commandline)
if ((new_bg_ptr->record_count - text_line_cnt)
- < (text_win->_maxy-3))
+ < (getmaxy(text_win) - 4))
text_line_cnt--;
if (params.hl)
nodes_req = get_requested_node_bitmap();
@@ -301,8 +299,6 @@
block_ptr->nodelist = list_create(_nodelist_del);
_make_nodelist(block_ptr->mp_str, block_ptr->nodelist);
- block_ptr->bg_user_name
- = xstrdup(new_bg_ptr->block_array[i].owner_name);
block_ptr->state = new_bg_ptr->block_array[i].state;
memcpy(block_ptr->bg_conn_type,
@@ -331,8 +327,21 @@
last_count++;
_marknodes(block_ptr, last_count);
}
- block_ptr->job_running =
- new_bg_ptr->block_array[i].job_running;
+
+ block_ptr->job_list = list_create(slurm_free_block_job_info);
+ if (new_bg_ptr->block_array[i].job_list) {
+ block_job_info_t *found_job;
+ ListIterator itr = list_iterator_create(
+ new_bg_ptr->block_array[i].job_list);
+ while ((found_job = list_next(itr))) {
+ block_job_info_t *block_job =
+ xmalloc(sizeof(block_job_info_t));
+ block_job->job_id = found_job->job_id;
+ list_append(block_ptr->job_list, block_job);
+ }
+ list_iterator_destroy(itr);
+ }
+
if (block_ptr->bg_conn_type[0] >= SELECT_SMALL)
block_ptr->size = 0;
@@ -396,6 +405,32 @@
return;
}
+static char *_set_running_job_str(List job_list, bool compact)
+{
+ int cnt = list_count(job_list);
+ block_job_info_t *block_job;
+
+ if (!cnt) {
+ return xstrdup("-");
+ } else if (cnt == 1) {
+ block_job = list_peek(job_list);
+ return xstrdup_printf("%u", block_job->job_id);
+ } else if (compact)
+ return xstrdup("multiple");
+ else {
+ char *tmp_char = NULL;
+ ListIterator itr = list_iterator_create(job_list);
+ while ((block_job = list_next(itr))) {
+ if (tmp_char)
+ xstrcat(tmp_char, " ");
+ xstrfmtcat(tmp_char, "%u", block_job->job_id);
+ }
+ return tmp_char;
+ }
+
+ return NULL;
+}
+
static void _marknodes(db2_block_info_t *block_ptr, int count)
{
int i, j = 0;
@@ -464,17 +499,13 @@
mvwprintw(text_win,
main_ycord,
main_xcord, "STATE");
- main_xcord += 7;
+ main_xcord += 8;
mvwprintw(text_win,
main_ycord,
main_xcord, "JOBID");
main_xcord += 8;
mvwprintw(text_win,
main_ycord,
- main_xcord, "USER");
- main_xcord += 9;
- mvwprintw(text_win,
- main_ycord,
main_xcord, "CONN");
main_xcord += 8;
if (params.cluster_flags & CLUSTER_FLAG_BGL) {
@@ -490,7 +521,7 @@
main_xcord += 7;
if (params.cluster_flags & CLUSTER_FLAG_BG)
mvwprintw(text_win, main_ycord,
- main_xcord, "BP_LIST");
+ main_xcord, "MIDPLANELIST");
else
mvwprintw(text_win, main_ycord,
main_xcord, "NODELIST");
@@ -504,16 +535,15 @@
} else {
printf(" BG_BLOCK ");
printf("STATE ");
- printf(" JOBID ");
- printf(" USER ");
- printf(" CONN ");
+ printf(" JOBID ");
+ printf(" CONN ");
if (params.cluster_flags & CLUSTER_FLAG_BGL)
printf(" NODE_USE ");
}
printf("NODES ");
if (params.cluster_flags & CLUSTER_FLAG_BG)
- printf("BP_LIST\n");
+ printf("MIDPLANELIST\n");
else
printf("NODELIST\n");
}
@@ -589,6 +619,8 @@
if (params.display == BGPART) {
if (db2_info_ptr) {
+ char *job_running = _set_running_job_str(
+ db2_info_ptr->job_list, 1);
mvwprintw(text_win,
main_ycord,
main_xcord, "%.16s",
@@ -596,18 +628,14 @@
main_xcord += 18;
mvwprintw(text_win,
main_ycord,
- main_xcord,
+ main_xcord, "%.7s",
bg_block_state_string(
db2_info_ptr->state));
- main_xcord += 7;
+ main_xcord += 8;
- if (db2_info_ptr->job_running > NO_JOB_RUNNING)
- snprintf(tmp_char, sizeof(tmp_char),
- "%d",
- db2_info_ptr->job_running);
- else
- snprintf(tmp_char, sizeof(tmp_char),
- "-");
+ snprintf(tmp_char, sizeof(tmp_char),
+ "%s", job_running);
+ xfree(job_running);
mvwprintw(text_win,
main_ycord,
@@ -615,12 +643,6 @@
"%.8s", tmp_char);
main_xcord += 8;
- mvwprintw(text_win,
- main_ycord,
- main_xcord, "%.8s",
- db2_info_ptr->bg_user_name);
- main_xcord += 9;
-
conn_str = conn_type_string_full(
db2_info_ptr->bg_conn_type);
mvwprintw(text_win,
@@ -647,7 +669,7 @@
mvwprintw(text_win,
main_ycord,
main_xcord, "?");
- main_xcord += 7;
+ main_xcord += 8;
mvwprintw(text_win,
main_ycord,
main_xcord, "?");
@@ -681,8 +703,7 @@
i = 0;
prefixlen = i;
while (nodes && nodes[i]) {
- width = text_win->_maxx
- - main_xcord;
+ width = getmaxx(text_win) - 1 - main_xcord;
if (!prefixlen && (nodes[i] == '[') &&
(nodes[i - 1] == ','))
@@ -691,13 +712,11 @@
if (nodes[i - 1] == ',' && (width - 12) <= 0) {
main_ycord++;
main_xcord = tempxcord + prefixlen;
- } else if (main_xcord >
- text_win->_maxx) {
+ } else if (main_xcord >= getmaxx(text_win)) {
main_ycord++;
main_xcord = tempxcord + prefixlen;
}
-
if ((printed = mvwaddch(text_win,
main_ycord,
main_xcord,
@@ -750,22 +769,16 @@
if (params.display == BGPART) {
if (db2_info_ptr) {
+ char *job_running = _set_running_job_str(
+ db2_info_ptr->job_list, 1);
printf("%16.16s ",
db2_info_ptr->bg_block_name);
- printf("%5.5s ",
+ printf("%-7.7s ",
bg_block_state_string(
db2_info_ptr->state));
- if (db2_info_ptr->job_running > NO_JOB_RUNNING)
- snprintf(tmp_char, sizeof(tmp_char),
- "%d",
- db2_info_ptr->job_running);
- else
- snprintf(tmp_char, sizeof(tmp_char),
- "-");
-
- printf("%8.8s ", tmp_char);
- printf("%8.8s ", db2_info_ptr->bg_user_name);
+ printf("%8.8s ", job_running);
+ xfree(job_running);
conn_str = conn_type_string_full(
db2_info_ptr->bg_conn_type);
@@ -800,14 +813,16 @@
db2_block_info_t *block_ptr = (db2_block_info_t *)object;
if (block_ptr) {
- xfree(block_ptr->bg_user_name);
xfree(block_ptr->bg_block_name);
xfree(block_ptr->slurm_part_name);
xfree(block_ptr->mp_str);
xfree(block_ptr->ionode_str);
if (block_ptr->nodelist)
list_destroy(block_ptr->nodelist);
-
+ if (block_ptr->job_list) {
+ list_destroy(block_ptr->job_list);
+ block_ptr->job_list = NULL;
+ }
xfree(block_ptr);
}
@@ -820,11 +835,6 @@
return;
}
-static int _list_match_all(void *object, void *key)
-{
- return 1;
-}
-
static int _in_slurm_partition(List slurm_nodes, List bg_nodes)
{
ListIterator slurm_itr;
diff --git a/src/smap/reservation_functions.c b/src/smap/reservation_functions.c
index 2831ef7..72a5f89 100644
--- a/src/smap/reservation_functions.c
+++ b/src/smap/reservation_functions.c
@@ -132,7 +132,7 @@
if (resv.node_inx[0] != -1) {
if (!params.commandline) {
if ((count >= text_line_cnt) &&
- (printed_resv < (text_win->_maxy-3))) {
+ (printed_resv < (getmaxy(text_win) - 4))){
resv.flags = (int)letters[count%62];
wattron(text_win,
COLOR_PAIR(colors[count%6]));
@@ -184,8 +184,12 @@
main_xcord, "%30.30s ",
"ACCESS_CONTROL(Accounts,Users)");
main_xcord += 32;
- mvwprintw(text_win, main_ycord,
- main_xcord, "%s", "NODELIST");
+ if (params.cluster_flags & CLUSTER_FLAG_BG)
+ mvwprintw(text_win, main_ycord,
+ main_xcord, "MIDPLANELIST");
+ else
+ mvwprintw(text_win, main_ycord,
+ main_xcord, "%s", "NODELIST");
main_xcord = 1;
main_ycord++;
} else {
@@ -194,7 +198,10 @@
printf("%19.19s ", "END_TIME");
printf("%5.5s ", "NODES");
printf("%30.30s ", "ACCESS_CONTROL(Accounts,Users)");
- printf("%s", "NODELIST\n");
+ if (params.cluster_flags & CLUSTER_FLAG_BG)
+ printf("MIDPLANELIST\n");
+ else
+ printf("NODELIST\n");
}
}
diff --git a/src/smap/smap.c b/src/smap/smap.c
index f5a05ff..03dd4e7 100644
--- a/src/smap/smap.c
+++ b/src/smap/smap.c
@@ -105,20 +105,18 @@
min_screen_width = 92;
/* no need for this if you are resolving */
- if (!params.resolve) {
- while (slurm_load_node((time_t) NULL,
- &new_node_ptr, SHOW_ALL)) {
- error_code = slurm_get_errno();
- printf("slurm_load_node: %s\n",
- slurm_strerror(error_code));
- if (params.display == COMMANDS) {
- new_node_ptr = NULL;
- break; /* just continue */
- }
- if (params.iterate == 0)
- exit(1);
- sleep(10); /* keep trying to reconnect */
+ while (slurm_load_node((time_t) NULL,
+ &new_node_ptr, SHOW_ALL)) {
+ if (params.resolve || (params.display == COMMANDS)) {
+ new_node_ptr = NULL;
+ break; /* just continue */
}
+ error_code = slurm_get_errno();
+ printf("slurm_load_node: %s\n",
+ slurm_strerror(error_code));
+ if (params.iterate == 0)
+ exit(1);
+ sleep(10); /* keep trying to reconnect */
}
select_g_ba_init(new_node_ptr, 0);
@@ -131,7 +129,7 @@
_init_colors();
if (params.resolve) {
- char *ret_str = resolve_mp(params.resolve);
+ char *ret_str = resolve_mp(params.resolve, new_node_ptr);
if (ret_str) {
printf("%s", ret_str);
xfree(ret_str);
@@ -141,10 +139,9 @@
if (!params.commandline) {
int check_width = min_screen_width;
- init_grid(new_node_ptr);
-
- signal(SIGWINCH, (void (*)(int))_resize_handler);
initscr();
+ init_grid(new_node_ptr, COLS);
+ signal(SIGWINCH, (void (*)(int))_resize_handler);
if (params.cluster_dims == 4) {
height = dim_size[2] * dim_size[3] + dim_size[2] + 3;
@@ -182,7 +179,7 @@
_set_pairs();
grid_win = newwin(height, width, starty, startx);
- max_display = grid_win->_maxy * grid_win->_maxx;
+ max_display = (getmaxy(grid_win) - 1) * (getmaxx(grid_win) - 1);
if (params.cluster_dims == 4) {
startx = width;
@@ -445,7 +442,7 @@
case KEY_DOWN:
if (!(params.cluster_flags & CLUSTER_FLAG_BG)) {
grid_line_cnt++;
- if ((((grid_line_cnt-2) * (grid_win->_maxx-1)) +
+ if ((((grid_line_cnt - 2) * (getmaxx(grid_win) - 2)) +
max_display) > dim_size[0]) {
grid_line_cnt--;
return 0;
@@ -505,7 +502,7 @@
}
grid_win = newwin(height, width, starty, startx);
- max_display = grid_win->_maxy * grid_win->_maxx;
+ max_display = (getmaxy(grid_win) - 1) * (getmaxx(grid_win) - 1);
if (params.cluster_dims == 4) {
startx = width;
diff --git a/src/smap/smap.h b/src/smap/smap.h
index 8ead4d6..e887f10 100644
--- a/src/smap/smap.h
+++ b/src/smap/smap.h
@@ -182,7 +182,7 @@
extern smap_system_t *smap_system_ptr;
extern int quiet_flag;
-extern void init_grid(node_info_msg_t *node_info_ptr);
+extern void init_grid(node_info_msg_t *node_info_ptr, int cols);
extern void update_grid(node_info_msg_t *node_info_ptr);
extern void clear_grid(void);
extern void free_grid(void);
@@ -195,7 +195,7 @@
extern void parse_command_line(int argc, char *argv[]);
extern void print_date(void);
extern void clear_window(WINDOW *win);
-extern char *resolve_mp(char *desc);
+extern char *resolve_mp(char *desc, node_info_msg_t *node_info_ptr);
extern void get_slurm_part(void);
extern void get_bg_part(void);
diff --git a/src/sprio/Makefile.in b/src/sprio/Makefile.in
index b56063f..3e1e333 100644
--- a/src/sprio/Makefile.in
+++ b/src/sprio/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -194,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -400,7 +402,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sprio$(EXEEXT): $(sprio_OBJECTS) $(sprio_DEPENDENCIES)
+sprio$(EXEEXT): $(sprio_OBJECTS) $(sprio_DEPENDENCIES) $(EXTRA_sprio_DEPENDENCIES)
@rm -f sprio$(EXEEXT)
$(sprio_LINK) $(sprio_OBJECTS) $(sprio_LDADD) $(LIBS)
@@ -540,10 +542,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/squeue/Makefile.in b/src/squeue/Makefile.in
index 0a1ab84..c12394a 100644
--- a/src/squeue/Makefile.in
+++ b/src/squeue/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -195,6 +195,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -231,6 +232,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -401,7 +403,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-squeue$(EXEEXT): $(squeue_OBJECTS) $(squeue_DEPENDENCIES)
+squeue$(EXEEXT): $(squeue_OBJECTS) $(squeue_DEPENDENCIES) $(EXTRA_squeue_DEPENDENCIES)
@rm -f squeue$(EXEEXT)
$(squeue_LINK) $(squeue_OBJECTS) $(squeue_LDADD) $(LIBS)
@@ -542,10 +544,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/squeue/opts.c b/src/squeue/opts.c
index 2cf9326..b4351a7 100644
--- a/src/squeue/opts.c
+++ b/src/squeue/opts.c
@@ -107,8 +107,10 @@
{"long", no_argument, 0, 'l'},
{"cluster", required_argument, 0, 'M'},
{"clusters", required_argument, 0, 'M'},
- {"node", required_argument, 0, 'n'},
- {"nodes", required_argument, 0, 'n'},
+ {"name", required_argument, 0, 'n'},
+ {"node", required_argument, 0, 'w'},
+ {"nodes", required_argument, 0, 'w'},
+ {"nodelist", required_argument, 0, 'w'},
{"noheader", no_argument, 0, 'h'},
{"partitions", required_argument, 0, 'p'},
{"qos", required_argument, 0, 'q'},
@@ -142,7 +144,7 @@
}
while ((opt_char = getopt_long(argc, argv,
- "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV",
+ "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vVw:",
long_options, &option_index)) != -1) {
switch (opt_char) {
case (int)'?':
@@ -196,15 +198,9 @@
working_cluster_rec = list_peek(params.clusters);
break;
case (int) 'n':
- if (params.nodes)
- hostset_destroy(params.nodes);
-
- params.nodes = hostset_create(optarg);
- if (params.nodes == NULL) {
- error("'%s' invalid entry for --nodes",
- optarg);
- exit(1);
- }
+ xfree(params.names);
+ params.names = xstrdup(optarg);
+ params.name_list = _build_str_list( params.names );
break;
case (int) 'o':
xfree(params.format);
@@ -260,6 +256,17 @@
case (int) 'V':
print_slurm_version();
exit(0);
+ case (int) 'w':
+ if (params.nodes)
+ hostset_destroy(params.nodes);
+
+ params.nodes = hostset_create(optarg);
+ if (params.nodes == NULL) {
+ error("'%s' invalid entry for --nodelist",
+ optarg);
+ exit(1);
+ }
+ break;
case OPT_LONG_HELP:
_help();
exit(0);
@@ -344,6 +351,12 @@
params.account_list = _build_str_list( params.accounts );
}
+ if ( ( params.names == NULL ) &&
+ ( env_val = getenv("SQUEUE_NAMES") ) ) {
+ params.names = xstrdup(env_val);
+ params.name_list = _build_str_list( params.names );
+ }
+
if ( ( params.partitions == NULL ) &&
( env_val = getenv("SQUEUE_PARTITION") ) ) {
params.partitions = xstrdup(env_val);
@@ -843,7 +856,7 @@
{
ListIterator iterator;
int i;
- char *part;
+ char *part, *name;
uint32_t *user;
enum job_states *state_id;
squeue_job_step_t *job_step_id;
@@ -863,6 +876,7 @@
printf( "job_flag = %d\n", params.job_flag );
printf( "jobs = %s\n", params.jobs );
printf( "max_cpus = %d\n", params.max_cpus ) ;
+ printf( "names = %s\n", params.names );
printf( "nodes = %s\n", hostlist ) ;
printf( "partitions = %s\n", params.partitions ) ;
printf( "reservation = %s\n", params.reservation ) ;
@@ -883,6 +897,16 @@
list_iterator_destroy( iterator );
}
+
+ if ((params.verbose > 1) && params.name_list) {
+ i = 0;
+ iterator = list_iterator_create( params.name_list );
+ while ( (name = list_next( iterator )) ) {
+ printf( "name_list[%d] = %u\n", i++, *name);
+ }
+ list_iterator_destroy( iterator );
+ }
+
if ((params.verbose > 1) && params.part_list) {
i = 0;
iterator = list_iterator_create( params.part_list );
@@ -958,7 +982,8 @@
}
/*
- * _build_str_list- build a list of strings
+ * _build_str_list - convert a string of comma-separated elements
+ * into a list of strings
* IN str - comma separated list of strings
* RET List of strings
*/
@@ -973,11 +998,12 @@
my_list = list_create( NULL );
my_part_list = xstrdup( str );
part = strtok_r( my_part_list, ",", &tmp_char );
- while (part)
- {
+ while (part) {
list_append( my_list, part );
part = strtok_r( NULL, ",", &tmp_char );
}
+ /* NOTE: Do NOT xfree my_part_list or the elements just added to the
+ * list will also be freed. */
return my_list;
}
@@ -1119,9 +1145,9 @@
static void _usage(void)
{
printf("\
-Usage: squeue [-i seconds] [-S fields] [--start] [-t states]\n\
- [-p partitions] [-n node] [-o format] [-u user_name]\n\
- [-R reservation] [--usage] [-ahjlsv]\n");
+Usage: squeue [-i seconds] [-n name] [-o format] [-p partitions]\n\
+ [-R reservation] [-S fields] [--start] [-t states]\n\
+ [-u user_name] [--usage] [-w nodes] [-ahjlsv]\n");
}
static void _help(void)
@@ -1140,8 +1166,7 @@
-M, --clusters=cluster_name cluster to issue commands to. Default is\n\
current cluster. cluster with no name will\n\
reset to default.\n\
- -n, --nodes=hostlist list of nodes to view, default is \n\
- all nodes\n\
+ -n, --name=job_name(s) comma separated list of job names to view\n\
-o, --format=format format specification\n\
-p, --partition=partition(s) comma separated list of partitions\n\
to view, default is all partitions\n\
@@ -1156,8 +1181,11 @@
default is pending and running,\n\
'--states=all' reports all states\n\
-u, --user=user_name(s) comma separated list of users to view\n\
+ --name=job_name(s) comma separated list of job names to view\n\
-v, --verbose verbosity level\n\
-V, --version output version information and exit\n\
+ -w, --nodelist=hostlist list of nodes to view, default is \n\
+ all nodes\n\
\nHelp options:\n\
--help show this help message\n\
--usage display a brief summary of squeue options\n");
diff --git a/src/squeue/print.c b/src/squeue/print.c
index aa6f6d1..ce123d8 100644
--- a/src/squeue/print.c
+++ b/src/squeue/print.c
@@ -614,7 +614,7 @@
if (job == NULL) { /* Print the Header instead */
char *title = "NODELIST";
if(params.cluster_flags & CLUSTER_FLAG_BG)
- title = "BP_LIST";
+ title = "MIDPLANELIST";
_print_str(title, width, right, false);
} else {
char *nodes = xstrdup(job->nodes);
@@ -645,7 +645,7 @@
if (job == NULL) { /* Print the Header instead */
char *title = "NODELIST(REASON)";
if(params.cluster_flags & CLUSTER_FLAG_BG)
- title = "BP_LIST(REASON)";
+ title = "MIDPLANELIST(REASON)";
_print_str(title, width, right, false);
} else if (!IS_JOB_COMPLETING(job)
&& (IS_JOB_PENDING(job)
@@ -751,10 +751,18 @@
static int _get_node_cnt(job_info_t * job)
{
int node_cnt = 0, round;
- bool completing = job->job_state & JOB_COMPLETING;
- uint16_t base_job_state = job->job_state & (~JOB_COMPLETING);
- if (base_job_state == JOB_PENDING || completing) {
+ /* For PENDING jobs, return the maximum of the requested nodelist,
+ * requested maximum number of nodes, or requested CPUs rounded
+ * to nearest node.
+ *
+ * For COMPLETING jobs, the job->nodes nodelist has already been
+ * altered to list only the nodes still in the comp state, and
+ * thus we count only those nodes toward the total nodes still
+ * allocated to this job.
+ */
+
+ if (IS_JOB_PENDING(job)) {
node_cnt = _nodes_in_list(job->req_nodes);
node_cnt = MAX(node_cnt, job->num_nodes);
round = job->num_cpus + params.max_cpus - 1;
@@ -1309,7 +1317,7 @@
if (step == NULL) { /* Print the Header instead */
char *title = "NODELIST";
if(params.cluster_flags & CLUSTER_FLAG_BG)
- title = "BP_LIST";
+ title = "MIDPLANELIST";
_print_str(title, width, right, false);
} else {
@@ -1367,7 +1375,7 @@
ListIterator iterator;
uint32_t *job_id, *user;
uint16_t *state_id;
- char *account, *part, *qos;
+ char *account, *part, *qos, *name;
if (params.job_list) {
filter = 1;
@@ -1487,6 +1495,21 @@
}
}
+ if (params.name_list) {
+ filter = 1;
+ iterator = list_iterator_create(params.name_list);
+ while ((name = list_next(iterator))) {
+ if ((job->name != NULL) &&
+ (strcasecmp(name, job->name) == 0)) {
+ filter = 0;
+ break;
+ }
+ }
+ list_iterator_destroy(iterator);
+ if (filter == 1)
+ return 8;
+ }
+
return 0;
}
diff --git a/src/squeue/squeue.h b/src/squeue/squeue.h
index 0ccf996..1df5449 100644
--- a/src/squeue/squeue.h
+++ b/src/squeue/squeue.h
@@ -92,6 +92,7 @@
uint32_t cluster_flags;
char* format;
char* jobs;
+ char* names;
hostset_t nodes;
char* partitions;
char* qoss;
@@ -104,6 +105,7 @@
List account_list;
List format_list;
List job_list;
+ List name_list;
List part_list;
List qos_list;
List state_list;
diff --git a/src/sreport/Makefile.in b/src/sreport/Makefile.in
index 1143fd2..f6fe57f 100644
--- a/src/sreport/Makefile.in
+++ b/src/sreport/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -192,6 +192,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -228,6 +229,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -409,7 +411,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sreport$(EXEEXT): $(sreport_OBJECTS) $(sreport_DEPENDENCIES)
+sreport$(EXEEXT): $(sreport_OBJECTS) $(sreport_DEPENDENCIES) $(EXTRA_sreport_DEPENDENCIES)
@rm -f sreport$(EXEEXT)
$(sreport_LINK) $(sreport_OBJECTS) $(sreport_LDADD) $(LIBS)
@@ -553,10 +555,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/srun/Makefile.am b/src/srun/Makefile.am
index 83da008..ee4f6ee 100644
--- a/src/srun/Makefile.am
+++ b/src/srun/Makefile.am
@@ -22,14 +22,22 @@
task_state.c task_state.h \
srun.wrapper.c
+
+# pthread is needed for compiling with g++ (which is used for linking
+# no matter if on a BGQ or not because of mentioning a .cc file
convenience_libs = \
- $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+ $(top_builddir)/src/api/libslurm.o -lpthread $(DL_LIBS)
srun_LDADD = \
$(convenience_libs)
srun_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
+if BGQ_LOADED
+srun_SOURCES += runjob_interface.cc
+srun_LDFLAGS += $(RUNJOB_LDFLAGS)
+endif
+
force:
$(convenience_libs) : force
@cd `dirname $@` && $(MAKE) `basename $@`
diff --git a/src/srun/Makefile.in b/src/srun/Makefile.in
index 9a4533e..a83fa5a 100644
--- a/src/srun/Makefile.in
+++ b/src/srun/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -38,6 +38,8 @@
host_triplet = @host@
target_triplet = @target@
bin_PROGRAMS = srun$(EXEEXT)
+@BGQ_LOADED_TRUE@am__append_1 = runjob_interface.cc
+@BGQ_LOADED_TRUE@am__append_2 = $(RUNJOB_LDFLAGS)
subdir = src/srun
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -87,18 +89,23 @@
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
PROGRAMS = $(bin_PROGRAMS)
+am__srun_SOURCES_DIST = srun.c srun.h opt.c opt.h srun_job.c \
+ srun_job.h srun_pty.c srun_pty.h debugger.h debugger.c fname.c \
+ fname.h allocate.c allocate.h multi_prog.c multi_prog.h \
+ task_state.c task_state.h srun.wrapper.c runjob_interface.cc
+@BGQ_LOADED_TRUE@am__objects_1 = runjob_interface.$(OBJEXT)
am_srun_OBJECTS = srun.$(OBJEXT) opt.$(OBJEXT) srun_job.$(OBJEXT) \
srun_pty.$(OBJEXT) debugger.$(OBJEXT) fname.$(OBJEXT) \
allocate.$(OBJEXT) multi_prog.$(OBJEXT) task_state.$(OBJEXT) \
- srun.wrapper.$(OBJEXT)
+ srun.wrapper.$(OBJEXT) $(am__objects_1)
srun_OBJECTS = $(am_srun_OBJECTS)
am__DEPENDENCIES_1 =
am__DEPENDENCIES_2 = $(top_builddir)/src/api/libslurm.o \
$(am__DEPENDENCIES_1)
srun_DEPENDENCIES = $(am__DEPENDENCIES_2)
-srun_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(srun_LDFLAGS) \
- $(LDFLAGS) -o $@
+srun_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(srun_LDFLAGS) $(LDFLAGS) -o $@
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
am__depfiles_maybe = depfiles
@@ -112,8 +119,17 @@
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
SOURCES = $(srun_SOURCES)
-DIST_SOURCES = $(srun_SOURCES)
+DIST_SOURCES = $(am__srun_SOURCES_DIST)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -194,6 +210,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +247,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -320,32 +338,24 @@
AUTOMAKE_OPTIONS = foreign
CLEANFILES = core.*
INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
-srun_SOURCES = \
- srun.c srun.h \
- opt.c opt.h \
- srun_job.c srun_job.h \
- srun_pty.c srun_pty.h \
- debugger.h \
- debugger.c \
- fname.c \
- fname.h \
- allocate.c \
- allocate.h \
- multi_prog.c multi_prog.h \
- task_state.c task_state.h \
- srun.wrapper.c
+srun_SOURCES = srun.c srun.h opt.c opt.h srun_job.c srun_job.h \
+ srun_pty.c srun_pty.h debugger.h debugger.c fname.c fname.h \
+ allocate.c allocate.h multi_prog.c multi_prog.h task_state.c \
+ task_state.h srun.wrapper.c $(am__append_1)
+# pthread is needed for compiling with g++ (which is used for linking
+# no matter if on a BGQ or not because of mentioning a .cc file
convenience_libs = \
- $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
+ $(top_builddir)/src/api/libslurm.o -lpthread $(DL_LIBS)
srun_LDADD = \
$(convenience_libs)
-srun_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
+srun_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) $(am__append_2)
all: all-am
.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .c .cc .lo .o .obj
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
@@ -419,7 +429,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-srun$(EXEEXT): $(srun_OBJECTS) $(srun_DEPENDENCIES)
+srun$(EXEEXT): $(srun_OBJECTS) $(srun_DEPENDENCIES) $(EXTRA_srun_DEPENDENCIES)
@rm -f srun$(EXEEXT)
$(srun_LINK) $(srun_OBJECTS) $(srun_LDADD) $(LIBS)
@@ -434,6 +444,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fname.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_prog.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opt.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/runjob_interface.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srun.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srun.wrapper.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srun_job.Po@am__quote@
@@ -461,6 +472,27 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
mostlyclean-libtool:
-rm -f *.lo
@@ -566,10 +598,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 2f65c7b..ba1c4a5 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -110,6 +110,10 @@
static void _signal_while_allocating(int signo)
{
+ debug("Got signal %d", signo);
+ if (signo == SIGCONT)
+ return;
+
destroy_job = 1;
if (pending_job_id != 0) {
slurm_complete_job(pending_job_id, NO_VAL);
@@ -317,7 +321,7 @@
pending_job_id = alloc->job_id;
- for (i=0; (cur_delay < max_delay); i++) {
+ for (i = 0; (cur_delay < max_delay); i++) {
if (i) {
if (i == 1)
verbose("Waiting for nodes to boot");
@@ -343,8 +347,18 @@
break;
}
if (is_ready) {
+ resource_allocation_response_msg_t *resp;
+ char *tmp_str;
if (i > 0)
verbose("Nodes %s are ready for job", alloc->node_list);
+ if (alloc->alias_list && !strcmp(alloc->alias_list, "TBD") &&
+ (slurm_allocation_lookup_lite(pending_job_id, &resp)
+ == SLURM_SUCCESS)) {
+ tmp_str = alloc->alias_list;
+ alloc->alias_list = resp->alias_list;
+ resp->alias_list = tmp_str;
+ slurm_free_resource_allocation_response_msg(resp);
+ }
} else if (!destroy_job)
error("Nodes %s are still not ready", alloc->node_list);
else /* allocation_interrupted and slurmctld not responing */
@@ -430,7 +444,7 @@
}
#else
if (!_wait_nodes_ready(resp)) {
- if(!destroy_job)
+ if (!destroy_job)
error("Something is wrong with the "
"boot of the nodes.");
goto relinquish;
@@ -462,7 +476,7 @@
}
int
-cleanup_allocation()
+cleanup_allocation(void)
{
slurm_allocation_msg_thr_destroy(msg_thr);
return SLURM_SUCCESS;
@@ -536,7 +550,6 @@
{
job_desc_msg_t *j = xmalloc(sizeof(*j));
hostlist_t hl = NULL;
- int i;
slurm_init_job_desc_msg(j);
@@ -546,9 +559,9 @@
if (opt.immediate == 1)
j->immediate = opt.immediate;
if (opt.job_name)
- j->name = xstrdup(opt.job_name);
+ j->name = opt.job_name;
else
- j->name = xstrdup(opt.cmd_name);
+ j->name = opt.cmd_name;
if (opt.argc > 0) {
j->argc = 1;
j->argv = (char **) xmalloc(sizeof(char *) * 2);
@@ -556,8 +569,8 @@
}
if (opt.acctg_freq >= 0)
j->acctg_freq = opt.acctg_freq;
- j->reservation = xstrdup(opt.reservation);
- j->wckey = xstrdup(opt.wckey);
+ j->reservation = opt.reservation;
+ j->wckey = opt.wckey;
j->req_nodes = xstrdup(opt.nodelist);
@@ -617,21 +630,21 @@
j->ntasks_per_core = opt.ntasks_per_core;
if (opt.mail_user)
- j->mail_user = xstrdup(opt.mail_user);
+ j->mail_user = opt.mail_user;
if (opt.begin)
j->begin_time = opt.begin;
if (opt.licenses)
- j->licenses = xstrdup(opt.licenses);
+ j->licenses = opt.licenses;
if (opt.network)
- j->network = xstrdup(opt.network);
+ j->network = opt.network;
if (opt.account)
- j->account = xstrdup(opt.account);
+ j->account = opt.account;
if (opt.comment)
- j->comment = xstrdup(opt.comment);
+ j->comment = opt.comment;
if (opt.qos)
- j->qos = xstrdup(opt.qos);
+ j->qos = opt.qos;
if (opt.cwd)
- j->work_dir = xstrdup(opt.cwd);
+ j->work_dir = opt.cwd;
if (opt.hold)
j->priority = 0;
@@ -639,29 +652,27 @@
j->job_id = opt.jobid;
#ifdef HAVE_BG
if (opt.geometry[0] > 0) {
- for (i=0; i<SYSTEM_DIMENSIONS; i++)
+ int i;
+ for (i = 0; i < SYSTEM_DIMENSIONS; i++)
j->geometry[i] = opt.geometry[i];
}
#endif
- for (i=0; i<HIGHEST_DIMENSIONS; i++) {
- if (opt.conn_type[i] == (uint16_t)NO_VAL)
- break;
- j->conn_type[i] = opt.conn_type[i];
- }
+ memcpy(j->conn_type, opt.conn_type, sizeof(j->conn_type));
+
if (opt.reboot)
j->reboot = 1;
if (opt.no_rotate)
j->rotate = 0;
if (opt.blrtsimage)
- j->blrtsimage = xstrdup(opt.blrtsimage);
+ j->blrtsimage = opt.blrtsimage;
if (opt.linuximage)
- j->linuximage = xstrdup(opt.linuximage);
+ j->linuximage = opt.linuximage;
if (opt.mloaderimage)
- j->mloaderimage = xstrdup(opt.mloaderimage);
+ j->mloaderimage = opt.mloaderimage;
if (opt.ramdiskimage)
- j->ramdiskimage = xstrdup(opt.ramdiskimage);
+ j->ramdiskimage = opt.ramdiskimage;
if (opt.max_nodes)
j->max_nodes = opt.max_nodes;
@@ -727,9 +738,7 @@
job_desc_msg_destroy(job_desc_msg_t *j)
{
if (j) {
- xfree(j->account);
- xfree(j->comment);
- xfree(j->qos);
+ xfree(j->req_nodes);
xfree(j);
}
}
@@ -745,10 +754,13 @@
job->ctx_params.job_id = job->jobid;
job->ctx_params.uid = opt.uid;
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* On a Q and onward this we don't add this. */
+#else
/* set the jobid for totalview */
totalview_jobid = NULL;
xstrfmtcat(totalview_jobid, "%u", job->ctx_params.job_id);
-
+#endif
/* Validate minimum and maximum node counts */
if (opt.min_nodes && opt.max_nodes &&
(opt.min_nodes > opt.max_nodes)) {
@@ -835,6 +847,7 @@
job->ctx_params.name = opt.job_name;
else
job->ctx_params.name = opt.cmd_name;
+ job->ctx_params.features = opt.constraints;
debug("requesting job %u, user %u, nodes %u including (%s)",
job->ctx_params.job_id, job->ctx_params.uid,
diff --git a/src/srun/debugger.c b/src/srun/debugger.c
index 81ec9d9..e85675a 100644
--- a/src/srun/debugger.c
+++ b/src/srun/debugger.c
@@ -42,6 +42,12 @@
# include "config.h"
#endif
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+/* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+
+#else
+
#include "src/common/log.h"
#include "src/srun/debugger.h"
@@ -70,5 +76,4 @@
debug("In MPIR_Breakpoint");
slurm_step_launch_fwd_signal(job->step_ctx, SIG_DEBUG_WAKE);
}
-
-
+#endif
diff --git a/src/srun/debugger.h b/src/srun/debugger.h
index b6eebd1..53b57ec 100644
--- a/src/srun/debugger.h
+++ b/src/srun/debugger.h
@@ -7,8 +7,15 @@
* http://www.etnus.com/
\*****************************************************************************/
-/* $Id: debugger.h 11149 2007-03-14 20:53:19Z morrone $
- */
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+/* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+
+#else
/* This file contains support for bringing processes up stopped, so that
* a debugger can attach to them (done for TotalView)
@@ -30,10 +37,6 @@
#endif
#endif
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
-
#include "src/srun/srun_job.h"
/*****************************************************************************
* DEBUGGING SUPPORT *
@@ -97,3 +100,4 @@
extern char *totalview_jobid;
#endif
+#endif
diff --git a/src/srun/fname.c b/src/srun/fname.c
index a254049..d1f7e7a 100644
--- a/src/srun/fname.c
+++ b/src/srun/fname.c
@@ -88,7 +88,7 @@
return fname;
}
- if (strncasecmp(format, "none", (size_t) 4) == 0) {
+ if (strcasecmp(format, "none") == 0) {
/*
* Set type to IO_PER_TASK so that /dev/null is opened
* on every node, which should be more efficient
diff --git a/src/srun/multi_prog.c b/src/srun/multi_prog.c
index 4c2035d..a61a3e7 100644
--- a/src/srun/multi_prog.c
+++ b/src/srun/multi_prog.c
@@ -85,7 +85,7 @@
}
/* check if already absolute path */
- if (file_name[0] == '/')
+ if ((file_name[0] == '/') || (file_name[0] == '.'))
return file_name;
/* search for the file using PATH environment variable */
@@ -113,9 +113,13 @@
static void
_set_range(int low_num, int high_num, char *exec_name, bool ignore_duplicates)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
int i;
- for (i=low_num; i<=high_num; i++) {
+ for (i = low_num; i <= high_num; i++) {
MPIR_PROCDESC *tv;
tv = &MPIR_proctable[i];
if (tv->executable_name == NULL) {
@@ -125,6 +129,7 @@
i);
}
}
+#endif
}
static void
@@ -178,13 +183,19 @@
FILE *config_fd;
char line[256];
char *ranks, *exec_name, *p, *ptrptr;
- int line_num = 0, i;
+ int line_num = 0;
- for (i=0; i<ntasks; i++) {
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
+ int i;
+ for (i = 0; i < ntasks; i++) {
MPIR_PROCDESC *tv;
tv = &MPIR_proctable[i];
tv->executable_name = NULL;
}
+#endif
config_fd = fopen(config_fname, "r");
if (config_fd == NULL) {
@@ -226,17 +237,26 @@
extern void
mpir_init(int num_tasks)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
MPIR_proctable_size = num_tasks;
MPIR_proctable = xmalloc(sizeof(MPIR_PROCDESC) * num_tasks);
if (MPIR_proctable == NULL) {
error("Unable to initialize MPIR_proctable: %m");
exit(error_exit);
}
+#endif
}
extern void
mpir_cleanup(void)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
int i;
for (i = 0; i < MPIR_proctable_size; i++) {
@@ -244,11 +264,16 @@
xfree(MPIR_proctable[i].executable_name);
}
xfree(MPIR_proctable);
+#endif
}
extern void
mpir_set_executable_names(const char *executable_name)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
int i;
for (i = 0; i < MPIR_proctable_size; i++) {
@@ -259,11 +284,16 @@
exit(error_exit);
}
}
+#endif
}
extern void
-mpir_dump_proctable()
+mpir_dump_proctable(void)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+#else
MPIR_PROCDESC *tv;
int i;
@@ -274,6 +304,7 @@
info("task:%d, host:%s, pid:%d, executable:%s",
i, tv->host_name, tv->pid, tv->executable_name);
}
+#endif
}
static int
diff --git a/src/srun/opt.c b/src/srun/opt.c
index b3d9057..69f2c79 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -117,6 +117,7 @@
#define OPT_ACCTG_FREQ 0x15
#define OPT_WCKEY 0x16
#define OPT_SIGNAL 0x17
+#define OPT_TIME_VAL 0x18
/* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_HELP 0x100
@@ -183,6 +184,7 @@
#define LONG_OPT_GRES 0x151
#define LONG_OPT_ALPS 0x152
#define LONG_OPT_REQ_SWITCH 0x153
+#define LONG_OPT_RUNJOB_OPTS 0x154
extern char **environ;
@@ -193,10 +195,6 @@
int immediate_exit = 1;
/*---- forward declarations of static functions ----*/
-#if defined HAVE_BG_FILES && HAVE_BGQ
-static const char *runjob_loc = "/bgsys/drivers/ppcfloor/hlcs/bin/runjob";
-#endif
-
typedef struct env_vars env_vars_t;
@@ -456,6 +454,7 @@
opt.wckey = NULL;
opt.req_switch = -1;
opt.wait4switch = -1;
+ opt.runjob_opts = NULL;
}
/*---[ env var processing ]-----------------------------------------------*/
@@ -541,7 +540,7 @@
{"SLURM_WCKEY", OPT_STRING, &opt.wckey, NULL },
{"SLURM_WORKING_DIR", OPT_STRING, &opt.cwd, &opt.cwd_set },
{"SLURM_REQ_SWITCH", OPT_INT, &opt.req_switch, NULL },
-{"SLURM_WAIT4SWITCH", OPT_INT, &opt.wait4switch, NULL },
+{"SLURM_WAIT4SWITCH", OPT_TIME_VAL, NULL, NULL },
{NULL, 0, NULL, NULL}
};
@@ -689,6 +688,10 @@
}
break;
+ case OPT_TIME_VAL:
+ opt.wait4switch = time_str2secs(val);
+ break;
+
default:
/* do nothing */
break;
@@ -820,6 +823,7 @@
{"reservation", required_argument, 0, LONG_OPT_RESERVATION},
{"restart-dir", required_argument, 0, LONG_OPT_RESTART_DIR},
{"resv-ports", optional_argument, 0, LONG_OPT_RESV_PORTS},
+ {"runjob-opts", required_argument, 0, LONG_OPT_RUNJOB_OPTS},
{"signal", required_argument, 0, LONG_OPT_SIGNAL},
{"slurmd-debug", required_argument, 0, LONG_OPT_DEBUG_SLURMD},
{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
@@ -920,7 +924,7 @@
exit(error_exit);
}
xfree(opt.efname);
- if (strncasecmp(optarg, "none", (size_t) 4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.efname = xstrdup("/dev/null");
else
opt.efname = xstrdup(optarg);
@@ -942,7 +946,7 @@
exit(error_exit);
}
xfree(opt.ifname);
- if (strncasecmp(optarg, "none", (size_t) 4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.ifname = xstrdup("/dev/null");
else
opt.ifname = xstrdup(optarg);
@@ -1012,7 +1016,7 @@
exit(error_exit);
}
xfree(opt.ofname);
- if (strncasecmp(optarg, "none", (size_t) 4) == 0)
+ if (strcasecmp(optarg, "none") == 0)
opt.ofname = xstrdup("/dev/null");
else
opt.ofname = xstrdup(optarg);
@@ -1222,7 +1226,13 @@
/* make other parameters look like debugger
* is really attached */
opt.parallel_debug = true;
+#if defined HAVE_BG_FILES && !defined HAVE_BGL && !defined HAVE_BGP
+ /* Use symbols from the runjob.so library provided by
+ * IBM. Do NOT use debugger symbols local to the srun
+ * command */
+#else
MPIR_being_debugged = 1;
+#endif
opt.max_launch_time = 120;
opt.max_threads = 1;
pmi_server_max_threads(opt.max_threads);
@@ -1386,6 +1396,13 @@
opt.ramdiskimage = xstrdup(optarg);
break;
case LONG_OPT_REBOOT:
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+ info("WARNING: If your job is smaller than the block "
+ "it is going to run on and other jobs are "
+ "running on it the --reboot option will not be "
+ "honored. If this is the case, contact your "
+ "admin to reboot the block for you.");
+#endif
opt.reboot = true;
break;
case LONG_OPT_GET_USER_ENV:
@@ -1440,6 +1457,10 @@
xfree(opt.reservation);
opt.reservation = xstrdup(optarg);
break;
+ case LONG_OPT_RUNJOB_OPTS:
+ xfree(opt.runjob_opts);
+ opt.runjob_opts = xstrdup(optarg);
+ break;
case LONG_OPT_CHECKPOINT_DIR:
xfree(opt.ckpt_dir);
opt.ckpt_dir = xstrdup(optarg);
@@ -1477,7 +1498,7 @@
if (pos_delimit != NULL) {
pos_delimit[0] = '\0';
pos_delimit++;
- opt.wait4switch = time_str2mins(pos_delimit) * 60;
+ opt.wait4switch = time_str2secs(pos_delimit);
}
opt.req_switch = _get_int(optarg, "switches",
true);
@@ -1527,12 +1548,142 @@
data_read += i;
}
close(config_fd);
- for (i=1; i<*argc; i++)
- xfree(argv[i]);
+
+ for (i = *argc+1; i > 1; i--)
+ argv[i] = argv[i-1];
argv[1] = data_buf;
- *argc = 2;
+ *argc += 1;
}
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+static bool _check_is_pow_of_2(int32_t n) {
+ /* Bitwise ANDing a power of 2 number like 16 with its
+ * negative (-16) gives itself back. Only integers which are power of
+ * 2 behave like that.
+ */
+ return ((n!=0) && (n&(-n))==n);
+}
+
+extern void bg_figure_nodes_tasks()
+{
+ /* A bit of setup for IBM's runjob. runjob only has so many
+ options, so it isn't that bad.
+ */
+ int32_t node_cnt;
+ if (opt.max_nodes)
+ node_cnt = opt.max_nodes;
+ else
+ node_cnt = opt.min_nodes;
+
+ if (!opt.ntasks_set) {
+ if (opt.ntasks_per_node != NO_VAL)
+ opt.ntasks = node_cnt * opt.ntasks_per_node;
+ else {
+ opt.ntasks = node_cnt;
+ opt.ntasks_per_node = 1;
+ }
+ opt.ntasks_set = true;
+ } else {
+ int32_t ntpn;
+ bool figured = false;
+
+ if (opt.nodes_set) {
+ if (node_cnt > opt.ntasks) {
+ if (opt.nodes_set_opt)
+ info("You asked for %d nodes, "
+ "but only %d tasks, resetting "
+ "node count to %u.",
+ node_cnt, opt.ntasks, opt.ntasks);
+ opt.max_nodes = opt.min_nodes = node_cnt
+ = opt.ntasks;
+ }
+ }
+ /* If nodes not set do not try to set min/max nodes
+ yet since that would result in an incorrect
+ allocation. For a step allocation it is figured
+ out later in srun_job.c _job_create_structure().
+ */
+
+ if (!opt.ntasks_per_node || (opt.ntasks_per_node == NO_VAL)) {
+ /* We always want the next larger number if
+ there is a fraction so we try to stay in
+ the allocation requested.
+ */
+ opt.ntasks_per_node =
+ (opt.ntasks + node_cnt - 1) / node_cnt;
+ figured = true;
+ }
+
+ /* On a Q we need ntasks_per_node to be a multiple of 2 */
+ ntpn = opt.ntasks_per_node;
+ while (!_check_is_pow_of_2(ntpn))
+ ntpn++;
+ if (!figured && (ntpn != opt.ntasks_per_node)) {
+ info("You requested --ntasks-per-node=%d, which is not "
+ "a power of 2. Setting --ntasks-per-node=%d "
+ "for you.", opt.ntasks_per_node, ntpn);
+ figured = true;
+ }
+ opt.ntasks_per_node = ntpn;
+
+ ntpn = opt.ntasks / opt.ntasks_per_node;
+ /* Make sure we are requesting the correct number of nodes. */
+ if (node_cnt < ntpn) {
+ opt.max_nodes = opt.min_nodes = ntpn;
+ if (opt.nodes_set && !figured) {
+ fatal("You requested -N %d and -n %d "
+ "with --ntasks-per-node=%d. "
+ "This isn't a valid request.",
+ node_cnt, opt.ntasks,
+ opt.ntasks_per_node);
+ }
+ node_cnt = opt.max_nodes;
+ }
+
+ /* Do this again to make sure we have a legitimate
+ ratio. */
+ ntpn = opt.ntasks_per_node;
+ if ((node_cnt * ntpn) < opt.ntasks) {
+ ntpn++;
+ while (!_check_is_pow_of_2(ntpn))
+ ntpn++;
+ if (!figured && (ntpn != opt.ntasks_per_node))
+ info("You requested --ntasks-per-node=%d, "
+ "which cannot spread across %d nodes "
+ "correctly. Setting --ntasks-per-node=%d "
+ "for you.",
+ opt.ntasks_per_node, node_cnt, ntpn);
+ opt.ntasks_per_node = ntpn;
+ }
+
+ if (opt.nodes_set) {
+ if ((opt.ntasks_per_node != 1)
+ && (opt.ntasks_per_node != 2)
+ && (opt.ntasks_per_node != 4)
+ && (opt.ntasks_per_node != 8)
+ && (opt.ntasks_per_node != 16)
+ && (opt.ntasks_per_node != 32)
+ && (opt.ntasks_per_node != 64))
+ fatal("You requested -N %d and -n %d "
+ "which gives --ntasks-per-node=%d. "
+ "This isn't a valid request.",
+ node_cnt, opt.ntasks,
+ opt.ntasks_per_node);
+ else if (!opt.overcommit
+ && ((opt.ntasks_per_node == 32)
+ || (opt.ntasks_per_node == 64)))
+ fatal("You requested -N %d and -n %d "
+ "which gives --ntasks-per-node=%d. "
+ "This isn't a valid request "
+ "without --overcommit.",
+ node_cnt, opt.ntasks,
+ opt.ntasks_per_node);
+ }
+ }
+}
+
+#endif
+
/*
* _opt_args() : set options via commandline args and popt
*/
@@ -1602,88 +1753,83 @@
while (rest[opt.argc] != NULL)
opt.argc++;
}
-#if defined HAVE_BGQ
- /* A bit of setup for IBM's runjob. runjob only has so many
- options, so it isn't that bad.
- */
- int32_t node_cnt;
- if (opt.max_nodes)
- node_cnt = opt.max_nodes;
- else
- node_cnt = opt.min_nodes;
- if (!opt.ntasks_set) {
- if (opt.ntasks_per_node != NO_VAL)
- opt.ntasks = node_cnt * opt.ntasks_per_node;
- else
- opt.ntasks = node_cnt;
- opt.ntasks_set = true;
- } else {
- if (opt.nodes_set) {
- if (node_cnt > opt.ntasks) {
- info("You asked for %d nodes, but only "
- "%d tasks, resetting node count to %u",
- node_cnt, opt.ntasks, opt.ntasks);
- opt.max_nodes = opt.min_nodes = node_cnt
- = opt.ntasks;
- }
- } else if (node_cnt > opt.ntasks)
- opt.max_nodes = opt.min_nodes = node_cnt = opt.ntasks;
+#if defined HAVE_BG && !defined HAVE_BG_L_P
- if (!opt.ntasks_per_node || (opt.ntasks_per_node == NO_VAL))
- opt.ntasks_per_node = opt.ntasks / node_cnt;
- else if ((opt.ntasks / opt.ntasks_per_node) != node_cnt)
- fatal("You are requesting for %d tasks, but are "
- "also asking for %d tasks per node and %d nodes.",
- opt.ntasks, opt.ntasks_per_node, node_cnt);
- }
+ bg_figure_nodes_tasks();
#if defined HAVE_BG_FILES
+ uint32_t taskid = NO_VAL;
if (!opt.test_only) {
/* Since we need the opt.argc to allocate the opt.argv array
* we need to do this before actually messing with
* things. All the extra options added to argv will be
* handled after the allocation. */
- /* Default location of the actual command to be ran. We always
- * have to add 3 options no matter what. */
- command_pos = 3;
+ /* We are always going to set ntasks_per_node and ntasks */
+ xassert(opt.ntasks_per_node != NO_VAL);
+ xassert(opt.ntasks_set);
- if (opt.ntasks_per_node != NO_VAL)
- command_pos += 2;
- if (opt.ntasks_set)
- command_pos += 2;
+ /* Default location of the actual command to be ran. We always
+ * have to add 5 options (calling prog, '-p', '--np',
+ * '--env-all' and ':') no matter what. */
+ command_pos = 7;
+
if (opt.cwd_set)
command_pos += 2;
if (opt.labelio)
command_pos += 2;
+ if (_verbose)
+ command_pos += 2;
+ if (opt.quiet)
+ command_pos += 2;
+ if (opt.ifname) {
+ if (!parse_uint32(opt.ifname, &taskid)
+ && ((int) taskid < opt.ntasks)) {
+ command_pos += 2;
+ }
+ }
+ if (opt.runjob_opts) {
+ char *save_ptr = NULL, *tok;
+ char *tmp = xstrdup(opt.runjob_opts);
+ tok = strtok_r(tmp, " ", &save_ptr);
+ while (tok) {
+ command_pos++;
+ tok = strtok_r(NULL, " ", &save_ptr);
+ }
+ xfree(tmp);
+ }
+
opt.argc += command_pos;
}
#endif
#endif
- opt.argv = (char **) xmalloc((opt.argc + 1) * sizeof(char *));
+ /* One extra pointer is for a trailing NULL and a
+ * second extra pointer is for arguments from the multi-prog file */
+ opt.argv = (char **) xmalloc((opt.argc + 2) * sizeof(char *));
-#if defined HAVE_BGQ
+#if defined HAVE_BG && !defined HAVE_BG_L_P
#if defined HAVE_BG_FILES
if (!opt.test_only) {
i = 0;
- /* Instead of running the actual job, the slurmstepd will be
- running runjob to run the job. srun is just wrapping it
- making things all kosher.
+ /* First arg has to be something when sending it to the
+ runjob api. This can be anything, srun seemed most
+ logical, but it doesn't matter.
*/
- opt.argv[i++] = xstrdup(runjob_loc);
- if (opt.ntasks_per_node != NO_VAL) {
- opt.argv[i++] = xstrdup("-p");
- opt.argv[i++] = xstrdup_printf("%d",
- opt.ntasks_per_node);
- }
+ opt.argv[i++] = xstrdup("srun");
+ /* srun launches tasks using runjob API. Slurmd is not used */
+ /* We are always going to set ntasks_per_node and ntasks */
+ // if (opt.ntasks_per_node != NO_VAL) {
+ opt.argv[i++] = xstrdup("-p");
+ opt.argv[i++] = xstrdup_printf("%d", opt.ntasks_per_node);
+ // }
- if (opt.ntasks_set) {
- opt.argv[i++] = xstrdup("--np");
- opt.argv[i++] = xstrdup_printf("%d", opt.ntasks);
- }
+ // if (opt.ntasks_set) {
+ opt.argv[i++] = xstrdup("--np");
+ opt.argv[i++] = xstrdup_printf("%d", opt.ntasks);
+ // }
if (opt.cwd_set) {
opt.argv[i++] = xstrdup("--cwd");
@@ -1699,9 +1845,35 @@
opt.labelio = 0;
}
+ if (_verbose) {
+ opt.argv[i++] = xstrdup("--verbose");
+ opt.argv[i++] = xstrdup_printf("%d", _verbose);
+ }
+
+ if (opt.quiet) {
+ opt.argv[i++] = xstrdup("--verbose");
+ opt.argv[i++] = xstrdup("OFF");
+ }
+
+ if (taskid != NO_VAL) {
+ opt.argv[i++] = xstrdup("--stdinrank");
+ opt.argv[i++] = xstrdup_printf("%u", taskid);
+ }
+
+ if (opt.runjob_opts) {
+ char *save_ptr = NULL, *tok;
+ char *tmp = xstrdup(opt.runjob_opts);
+ tok = strtok_r(tmp, " ", &save_ptr);
+ while (tok) {
+ opt.argv[i++] = xstrdup(tok);
+ tok = strtok_r(NULL, " ", &save_ptr);
+ }
+ xfree(tmp);
+ }
+
/* Export all the environment so the runjob_mux will get the
* correct info about the job, namely the block. */
- opt.argv[i++] = xstrdup("--env_all");
+ opt.argv[i++] = xstrdup("--env-all");
/* With runjob anything after a ':' is treated as the actual
* job, which in this case is exactly what it is. So, very
@@ -2416,7 +2588,13 @@
/* Determine if srun is under the control of a parallel debugger or not */
static bool _under_parallel_debugger (void)
{
+#if defined HAVE_BG_FILES && !defined HAVE_BGL && !defined HAVE_BGP
+ /* Use symbols from the runjob.so library provided by IBM.
+ * Do NOT use debugger symbols local to the srun command */
+ return false;
+#else
return (MPIR_being_debugged != 0);
+#endif
}
@@ -2438,7 +2616,12 @@
" [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n"
" [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n"
#ifdef HAVE_BG /* Blue gene specific options */
-" [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [--reboot]\n"
+#ifdef HAVE_BG_L_P
+" [--geometry=XxYxZ] "
+#else
+" [--geometry=AxXxYxZ] "
+#endif
+"[--conn-type=type] [--no-rotate] [--reboot]\n"
#ifdef HAVE_BGL
" [--blrts-image=path] [--linux-image=path]\n"
" [--mloader-image=path] [--ramdisk-image=path]\n"
@@ -2446,6 +2629,9 @@
" [--cnload-image=path]\n"
" [--mloader-image=path] [--ioload-image=path]\n"
#endif
+#ifdef HAVE_BGQ
+" [--runjob-opts=options]\n"
+#endif
#endif
" [--mail-type=type] [--mail-user=user] [--nice[=value]]\n"
" [--prolog=fname] [--epilog=fname]\n"
@@ -2582,7 +2768,13 @@
#endif
#ifdef HAVE_BG /* Blue gene specific options */
"Blue Gene related options:\n"
+#ifdef HAVE_BG_L_P
" -g, --geometry=XxYxZ geometry constraints of the job\n"
+#else
+" -g, --geometry=AxXxYxZ Midplane geometry constraints of the job,\n"
+" sub-block allocations can not be allocated\n"
+" with the geometry option\n"
+#endif
" -R, --no-rotate disable geometry rotation\n"
" --reboot reboot block before starting job\n"
" --conn-type=type constraint on type of connection, MESH or TORUS\n"
@@ -2601,6 +2793,9 @@
" --mloader-image=path path to mloader image for bluegene block. Default if not set\n"
" --ramdisk-image=path path to ramdisk image for bluegene block. Default if not set\n"
#endif
+#ifdef HAVE_BGQ
+" --runjob-opts=options options for the runjob command\n"
+#endif
#endif
"\n"
"Help options:\n"
diff --git a/src/srun/opt.h b/src/srun/opt.h
index f0d0676..8ce0c24 100644
--- a/src/srun/opt.h
+++ b/src/srun/opt.h
@@ -225,6 +225,7 @@
int spank_job_env_size; /* size of spank_job_env */
int req_switch; /* Minimum number of switches */
int wait4switch; /* Maximum time to wait for minimum switches */
+ char *runjob_opts; /* Runjob command options, BGQ only */
} opt_t;
extern opt_t opt;
@@ -264,4 +265,8 @@
* via salloc or sbatch commands */
extern void init_spank_env(void);
+#if defined HAVE_BG && !defined HAVE_BG_L_P
+extern void bg_figure_nodes_tasks(void);
+#endif
+
#endif /* _HAVE_OPT_H */
diff --git a/src/srun/runjob_interface.cc b/src/srun/runjob_interface.cc
new file mode 100644
index 0000000..059a458
--- /dev/null
+++ b/src/srun/runjob_interface.cc
@@ -0,0 +1,78 @@
+/*****************************************************************************\
+ * runjob_interface.cc
+ *
+ *****************************************************************************
+ * Copyright (C) 2011 SchedMD LLC
+ * Written by Danny Auble <da@schedmd.com>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+extern "C" {
+#include "runjob_interface.h"
+}
+
+#ifdef HAVE_BG_FILES
+
+#include <bgsched/runjob/Client.h>
+
+#include <iostream>
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+static bgsched::runjob::Client *rj_client_ptr = NULL;
+
+
+extern int runjob_launch(int argc, char **argv,
+ int input, int output, int error)
+{
+ try {
+ rj_client_ptr = new(bgsched::runjob::Client)(argc, argv);
+ return rj_client_ptr->start(input, output, error);
+ } catch (const std::exception& e) {
+ std::cerr << "could not runjob: " << e.what() << std::endl;
+ return -1;
+ }
+}
+
+extern void runjob_signal(int signal)
+{
+ if (rj_client_ptr) {
+ try {
+ rj_client_ptr->kill(signal);
+ } catch (const std::exception& e) {
+ std::cerr << "could send signal " << signal
+ << " to job: " << e.what() << std::endl;
+ }
+ }
+}
+
+#endif
diff --git a/src/srun/runjob_interface.h b/src/srun/runjob_interface.h
new file mode 100644
index 0000000..e151a14
--- /dev/null
+++ b/src/srun/runjob_interface.h
@@ -0,0 +1,54 @@
+/*****************************************************************************\
+ * runjob_interface.h
+ *
+ *****************************************************************************
+ * Copyright (C) 2011 SchedMD LLC
+ * Written by Danny Auble <da@schedmd.com>
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * In addition, as a special exception, the copyright holders give permission
+ * to link the code of portions of this program with the OpenSSL library under
+ * certain conditions as described in each individual source file, and
+ * distribute linked combinations including the two. You must obey the GNU
+ * General Public License in all respects for all of the code used other than
+ * OpenSSL. If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not obligated to do
+ * so. If you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files in
+ * the program, then also delete it here.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+
+#ifndef _RUNJOB_INTERFACE_H_
+#define _RUNJOB_INTERFACE_H_
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifdef HAVE_BG_FILES
+
+extern int runjob_launch(int argc, char **argv,
+ int input, int output, int error);
+
+extern void runjob_signal(int signal);
+
+#endif
+
+#endif
diff --git a/src/srun/srun.c b/src/srun/srun.c
index ce567e8..0418795 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -71,21 +71,22 @@
#include <unistd.h>
#include <grp.h>
-
#include "src/common/fd.h"
#include "src/common/hostlist.h"
#include "src/common/log.h"
+#include "src/common/mpi.h"
+#include "src/common/net.h"
+#include "src/common/plugstack.h"
+#include "src/common/read_config.h"
+#include "src/common/slurm_auth.h"
+#include "src/common/slurm_jobacct_gather.h"
#include "src/common/slurm_protocol_api.h"
+#include "src/common/slurm_rlimits_info.h"
#include "src/common/switch.h"
+#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xsignal.h"
#include "src/common/xstring.h"
-#include "src/common/net.h"
-#include "src/common/mpi.h"
-#include "src/common/slurm_rlimits_info.h"
-#include "src/common/plugstack.h"
-#include "src/common/read_config.h"
-#include "src/common/uid.h"
#include "src/srun/allocate.h"
#include "src/srun/srun_job.h"
@@ -96,8 +97,13 @@
#include "src/srun/multi_prog.h"
#include "src/srun/task_state.h"
#include "src/api/pmi_server.h"
+#include "src/api/step_ctx.h"
#include "src/api/step_launch.h"
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+#include "src/srun/runjob_interface.h"
+#endif
+
#if defined (HAVE_DECL_STRSIGNAL) && !HAVE_DECL_STRSIGNAL
# ifndef strsignal
extern char *strsignal(int);
@@ -149,20 +155,28 @@
static void _run_srun_prolog (srun_job_t *job);
static void _run_srun_epilog (srun_job_t *job);
static int _run_srun_script (srun_job_t *job, char *script);
-static void _set_cpu_env_var(resource_allocation_response_msg_t *resp);
+static void _set_env_vars(resource_allocation_response_msg_t *resp);
static void _set_exit_code(void);
+static void _set_node_alias(void);
static void _step_opt_exclusive(void);
static void _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds);
static void _set_submit_dir_env(void);
static void _set_prio_process_env(void);
static int _set_rlimit_env(void);
static int _set_umask_env(void);
+static void _shepard_notify(int shepard_fd);
+static int _shepard_spawn(srun_job_t *job, bool got_alloc);
static int _slurm_debug_env_val (void);
static void *_srun_signal_mgr(void *no_data);
-static void _task_start(launch_tasks_response_msg_t *msg);
-static void _task_finish(task_exit_msg_t *msg);
static char *_uint16_array_to_str(int count, const uint16_t *array);
static int _validate_relative(resource_allocation_response_msg_t *resp);
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+static void _send_step_complete_rpc(int step_rc);
+static pthread_t _spawn_msg_handler(void);
+#else
+static void _task_start(launch_tasks_response_msg_t *msg);
+static void _task_finish(task_exit_msg_t *msg);
+#endif
/*
* from libvirt-0.6.2 GPL2
@@ -190,11 +204,17 @@
int debug_level;
env_t *env = xmalloc(sizeof(env_t));
log_options_t logopt = LOG_OPTS_STDERR_ONLY;
- slurm_step_launch_params_t launch_params;
- slurm_step_launch_callbacks_t callbacks;
pthread_attr_t thread_attr;
pthread_t signal_thread = (pthread_t) 0;
- int got_alloc = 0;
+ bool got_alloc = false;
+ int shepard_fd = -1;
+#if !defined HAVE_BG_FILES || defined HAVE_BG_L_P
+ slurm_step_launch_params_t launch_params;
+ slurm_step_launch_callbacks_t callbacks;
+#else
+ slurm_step_io_fds_t cio_fds;
+ pthread_t msg_thread = (pthread_t) 0;
+#endif
env->stepid = -1;
env->procid = -1;
@@ -209,6 +229,9 @@
log_init(xbasename(av[0]), logopt, 0, NULL);
_set_exit_code();
+ if (slurm_select_init(1) != SLURM_SUCCESS )
+ fatal( "failed to initialize node selection plugin" );
+
/* This must happen before we spawn any threads
* which are not designed to handle them */
if (xsignal_block(sig_array) < 0)
@@ -309,7 +332,7 @@
opt.alloc_nodelist = xstrdup(resp->node_list);
if (opt.exclusive)
_step_opt_exclusive();
- _set_cpu_env_var(resp);
+ _set_env_vars(resp);
if (_validate_relative(resp))
exit(error_exit);
job = job_step_create_allocation(resp);
@@ -323,7 +346,7 @@
exit(error_exit);
} else {
/* Combined job allocation and job step launch */
-#if defined HAVE_FRONT_END && (!defined HAVE_BGQ || !defined HAVE_BG_FILES)
+#if defined HAVE_FRONT_END && (!defined HAVE_BG || defined HAVE_BG_L_P || !defined HAVE_BG_FILES)
uid_t my_uid = getuid();
if ((my_uid != 0) &&
(my_uid != slurm_get_slurm_user_id())) {
@@ -343,9 +366,9 @@
if ( !(resp = allocate_nodes()) )
exit(error_exit);
- got_alloc = 1;
+ got_alloc = true;
_print_job_information(resp);
- _set_cpu_env_var(resp);
+ _set_env_vars(resp);
if (_validate_relative(resp)) {
slurm_complete_job(resp->job_id, 1);
exit(error_exit);
@@ -354,6 +377,7 @@
opt.exclusive = false; /* not applicable for this step */
opt.time_limit = NO_VAL;/* not applicable for step, only job */
+ xfree(opt.constraints); /* not applicable for this step */
if (!opt.job_name_set_cmd && opt.job_name_set_env) {
/* use SLURM_JOB_NAME env var */
opt.job_name_set_cmd = true;
@@ -380,6 +404,12 @@
info("Warning: Unable to assume uid=%u", opt.uid);
/*
+ * Spawn process to insure clean-up of job and/or step on abnormal
+ * termination
+ */
+ shepard_fd = _shepard_spawn(job, got_alloc);
+
+ /*
* Enhance environment for job
*/
if (opt.cpus_set)
@@ -439,16 +469,43 @@
setup_env(env, opt.preserve_env);
xfree(env->task_count);
xfree(env);
+ _set_node_alias();
- re_launch:
-#if defined HAVE_BGQ
-//#if defined HAVE_BGQ && defined HAVE_BG_FILES
- task_state = task_state_create(1);
+ if (!signal_thread) {
+ slurm_attr_init(&thread_attr);
+ while (pthread_create(&signal_thread, &thread_attr,
+ _srun_signal_mgr, NULL)) {
+ error("pthread_create error %m");
+ sleep(1);
+ }
+ slurm_attr_destroy(&thread_attr);
+ }
+
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ _run_srun_prolog(job);
+ if (_call_spank_local_user (job) < 0) {
+ error("Failure in local plugin stack");
+ exit(error_exit);
+ }
+ memset(&cio_fds, 0, sizeof(slurm_step_io_fds_t));
+ _set_stdio_fds(job, &cio_fds);
+ msg_thread = _spawn_msg_handler();
+ global_rc = runjob_launch(opt.argc, opt.argv,
+ cio_fds.in.fd,
+ cio_fds.out.fd,
+ cio_fds.err.fd);
+ _send_step_complete_rpc(global_rc);
+ if (msg_thread) {
+ srun_shutdown = true;
+ pthread_cancel(msg_thread);
+ pthread_join(msg_thread, NULL);
+ }
#else
+ re_launch:
task_state = task_state_create(opt.ntasks);
-#endif
slurm_step_launch_params_t_init(&launch_params);
launch_params.gid = opt.gid;
+ launch_params.alias_list = job->alias_list;
launch_params.argc = opt.argc;
launch_params.argv = opt.argv;
launch_params.multi_prog = opt.multi_prog ? true : false;
@@ -479,16 +536,6 @@
launch_params.preserve_env = opt.preserve_env;
launch_params.spank_job_env = opt.spank_job_env;
launch_params.spank_job_env_size = opt.spank_job_env_size;
- /* job structure should now be filled in */
- if (!signal_thread) {
- slurm_attr_init(&thread_attr);
- while (pthread_create(&signal_thread, &thread_attr,
- _srun_signal_mgr, NULL)) {
- error("pthread_create error %m");
- sleep(1);
- }
- slurm_attr_destroy(&thread_attr);
- }
_set_stdio_fds(job, &launch_params.local_fds);
@@ -517,6 +564,7 @@
SLURM_SUCCESS) {
error("Application launch failed: %m");
global_rc = 1;
+ slurm_step_launch_abort(job->step_ctx);
slurm_step_launch_wait_finish(job->step_ctx);
goto cleanup;
}
@@ -542,7 +590,7 @@
}
slurm_step_launch_wait_finish(job->step_ctx);
- if ((MPIR_being_debugged == 0) && retry_step_begin &&
+ if ((MPIR_being_debugged == 0) && retry_step_begin &&
(retry_step_cnt < MAX_STEP_RETRIES)) {
retry_step_begin = false;
slurm_step_ctx_destroy(job->step_ctx);
@@ -556,8 +604,9 @@
task_state_destroy(task_state);
goto re_launch;
}
-
cleanup:
+#endif
+
if (got_alloc) {
cleanup_allocation();
@@ -567,6 +616,7 @@
else
slurm_complete_job(job->jobid, global_rc);
}
+ _shepard_notify(shepard_fd);
if (signal_thread) {
srun_shutdown = true;
@@ -783,19 +833,30 @@
}
}
-static void _set_cpu_env_var(resource_allocation_response_msg_t *resp)
+static void _set_env_vars(resource_allocation_response_msg_t *resp)
{
char *tmp;
- if (getenv("SLURM_JOB_CPUS_PER_NODE"))
- return;
+ if (!getenv("SLURM_JOB_CPUS_PER_NODE")) {
+ tmp = uint32_compressed_to_str(resp->num_cpu_groups,
+ resp->cpus_per_node,
+ resp->cpu_count_reps);
+ if (setenvf(NULL, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp) < 0) {
+ error("unable to set SLURM_JOB_CPUS_PER_NODE in "
+ "environment");
+ }
+ xfree(tmp);
+ }
- tmp = uint32_compressed_to_str(resp->num_cpu_groups,
- resp->cpus_per_node,
- resp->cpu_count_reps);
- if (setenvf(NULL, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp) < 0)
- error("unable to set SLURM_JOB_CPUS_PER_NODE in environment");
- xfree(tmp);
+ if (resp->alias_list) {
+ if (setenv("SLURM_NODE_ALIASES", resp->alias_list, 1) < 0) {
+ error("unable to set SLURM_NODE_ALIASES in "
+ "environment");
+ }
+ } else {
+ unsetenv("SLURM_NODE_ALIASES");
+ }
+
return;
}
@@ -879,19 +940,48 @@
return rc;
}
+static void _set_node_alias(void)
+{
+ char *aliases, *save_ptr = NULL, *tmp;
+ char *addr, *hostname, *slurm_name;
+
+ tmp = getenv("SLURM_NODE_ALIASES");
+ if (!tmp)
+ return;
+ aliases = xstrdup(tmp);
+ slurm_name = strtok_r(aliases, ":", &save_ptr);
+ while (slurm_name) {
+ addr = strtok_r(NULL, ":", &save_ptr);
+ if (!addr)
+ break;
+ slurm_reset_alias(slurm_name, addr, addr);
+ hostname = strtok_r(NULL, ",", &save_ptr);
+ if (!hostname)
+ break;
+ slurm_name = strtok_r(NULL, ":", &save_ptr);
+ }
+ xfree(aliases);
+}
+
static int _become_user (void)
{
char *user = uid_to_string(opt.uid);
gid_t gid = gid_from_uid(opt.uid);
- if (strcmp(user, "nobody") == 0)
+ if (strcmp(user, "nobody") == 0) {
+ xfree(user);
return (error ("Invalid user id %u: %m", opt.uid));
+ }
- if (opt.uid == getuid ())
+ if (opt.uid == getuid ()) {
+ xfree(user);
return (0);
+ }
- if ((opt.egid != (gid_t) -1) && (setgid (opt.egid) < 0))
+ if ((opt.egid != (gid_t) -1) && (setgid (opt.egid) < 0)) {
+ xfree(user);
return (error ("setgid: %m"));
+ }
initgroups (user, gid); /* Ignore errors */
xfree(user);
@@ -970,6 +1060,138 @@
/* NOTREACHED */
}
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+static void
+_send_step_complete_rpc(int step_rc)
+{
+ slurm_msg_t req;
+ step_complete_msg_t msg;
+ int rc;
+
+ memset(&msg, 0, sizeof(step_complete_msg_t));
+ msg.job_id = job->jobid;
+ msg.job_step_id = job->stepid;
+ msg.range_first = 0;
+ msg.range_last = 0;
+ msg.step_rc = step_rc;
+ msg.jobacct = jobacct_gather_g_create(NULL);
+
+ slurm_msg_t_init(&req);
+ req.msg_type = REQUEST_STEP_COMPLETE;
+ req.data = &msg;
+/* req.address = step_complete.parent_addr; */
+
+ debug3("Sending step complete RPC to slurmctld");
+ if (slurm_send_recv_controller_rc_msg(&req, &rc) < 0)
+ error("Error sending step complete RPC to slurmctld");
+ jobacct_gather_g_destroy(msg.jobacct);
+}
+
+static void
+_handle_msg(slurm_msg_t *msg)
+{
+ static uint32_t slurm_uid = NO_VAL;
+ uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
+ uid_t uid = getuid();
+ job_step_kill_msg_t *ss;
+ srun_user_msg_t *um;
+
+ if (slurm_uid == NO_VAL)
+ slurm_uid = slurm_get_slurm_user_id();
+ if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) {
+ error ("Security violation, slurm message from uid %u",
+ (unsigned int) req_uid);
+ return;
+ }
+
+ switch (msg->msg_type) {
+ case SRUN_PING:
+ debug3("slurmctld ping received");
+ slurm_send_rc_msg(msg, SLURM_SUCCESS);
+ slurm_free_srun_ping_msg(msg->data);
+ break;
+ case SRUN_JOB_COMPLETE:
+ debug("received job step complete message");
+ slurm_free_srun_job_complete_msg(msg->data);
+ runjob_signal(SIGKILL);
+ break;
+ case SRUN_USER_MSG:
+ um = msg->data;
+ info("%s", um->msg);
+ slurm_free_srun_user_msg(msg->data);
+ break;
+ case SRUN_TIMEOUT:
+ um = msg->data;
+ debug("received job step timeout message");
+ runjob_signal(SIGKILL);
+ slurm_free_srun_timeout_msg(msg->data);
+ break;
+ case SRUN_STEP_SIGNAL:
+ ss = msg->data;
+ debug("received step signal %u RPC", ss->signal);
+ runjob_signal(ss->signal);
+ slurm_free_job_step_kill_msg(msg->data);
+ break;
+ default:
+ debug("received spurious message type: %u",
+ msg->msg_type);
+ break;
+ }
+ return;
+}
+
+static void *_msg_thr_internal(void *arg)
+{
+ slurm_addr_t cli_addr;
+ slurm_fd_t newsockfd;
+ slurm_msg_t *msg;
+ int *slurmctld_fd_ptr = (int *)arg;
+
+ (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+
+ while (!srun_shutdown) {
+ newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr);
+ if (newsockfd == SLURM_SOCKET_ERROR) {
+ if (errno != EINTR)
+ error("slurm_accept_msg_conn: %m");
+ continue;
+ }
+ msg = xmalloc(sizeof(slurm_msg_t));
+ if (slurm_receive_msg(newsockfd, msg, 0) != 0) {
+ error("slurm_receive_msg: %m");
+ /* close the new socket */
+ slurm_close_accepted_conn(newsockfd);
+ continue;
+ }
+ _handle_msg(msg);
+ slurm_free_msg(msg);
+ slurm_close_accepted_conn(newsockfd);
+ }
+ return NULL;
+}
+
+static pthread_t
+_spawn_msg_handler(void)
+{
+ pthread_attr_t attr;
+ pthread_t msg_thread;
+ static int slurmctld_fd;
+
+ slurmctld_fd = job->step_ctx->launch_state->slurmctld_socket_fd;
+ if (slurmctld_fd < 0)
+ return (pthread_t) 0;
+ job->step_ctx->launch_state->slurmctld_socket_fd = -1;
+
+ slurm_attr_init(&attr);
+ if (pthread_create(&msg_thread, &attr, _msg_thr_internal,
+ (void *) &slurmctld_fd))
+ error("pthread_create of message thread: %m");
+ slurm_attr_destroy(&attr);
+ return msg_thread;
+}
+#endif
+
static int
_is_local_file (fname_t *fname)
{
@@ -1105,13 +1327,29 @@
}
static void
+_terminate_job_step(slurm_step_ctx_t *step_ctx)
+{
+ uint32_t job_id, step_id;
+
+ slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
+ slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
+ info("Terminating job step %u.%u", job_id, step_id);
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
+ slurm_kill_job_step(job_id, step_id, SIGKILL);
+#endif
+}
+
+#if !defined HAVE_BG_FILES || defined HAVE_BG_L_P
+static void
_task_start(launch_tasks_response_msg_t *msg)
{
MPIR_PROCDESC *table;
int taskid;
int i;
- if(msg->count_of_pids)
+ if (msg->count_of_pids)
verbose("Node %s, %d tasks started",
msg->node_name, msg->count_of_pids);
else
@@ -1138,18 +1376,6 @@
}
-static void
-_terminate_job_step(slurm_step_ctx_t *step_ctx)
-{
- uint32_t job_id, step_id;
-
- slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
- slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
- info("Terminating job step %u.%u", job_id, step_id);
- update_job_state(job, SRUN_JOB_CANCELLED);
- slurm_kill_job_step(job_id, step_id, SIGKILL);
-}
-
static char *
_hostset_to_string(hostset_t hs)
{
@@ -1360,18 +1586,36 @@
if (task_state_first_exit(task_state) && (opt.max_wait > 0))
_setup_max_wait_timer();
}
+#endif
+
+/* Return the number of microseconds between tv1 and tv2 with a maximum
+ * a maximum value of 10,000,000 to prevent overflows */
+static long _diff_tv_str(struct timeval *tv1,struct timeval *tv2)
+{
+ long delta_t;
+
+ delta_t = MIN((tv2->tv_sec - tv1->tv_sec), 10);
+ delta_t *= 1000000;
+ delta_t += tv2->tv_usec - tv1->tv_usec;
+ return delta_t;
+}
static void _handle_intr(void)
{
- static time_t last_intr = 0;
- static time_t last_intr_sent = 0;
- time_t now = time(NULL);
+ static struct timeval last_intr = { 0, 0 };
+ static struct timeval last_intr_sent = { 0, 0 };
+ struct timeval now;
- if (!opt.quit_on_intr && ((now - last_intr) > 1)) {
+ gettimeofday(&now, NULL);
+ if (!opt.quit_on_intr && (_diff_tv_str(&last_intr, &now) > 1000000)) {
if (opt.disable_status) {
info("sending Ctrl-C to job %u.%u",
job->jobid, job->stepid);
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGINT);
+#else
slurm_step_launch_fwd_signal(job->step_ctx, SIGINT);
+#endif
} else if (job->state < SRUN_JOB_FORCETERM) {
info("interrupt (one more within 1 sec to abort)");
task_state_print(task_state, (log_f) info);
@@ -1379,29 +1623,40 @@
info("interrupt (abort already in progress)");
task_state_print(task_state, (log_f) info);
}
- last_intr = time(NULL);
+ last_intr = now;
} else { /* second Ctrl-C in half as many seconds */
update_job_state(job, SRUN_JOB_CANCELLED);
/* terminate job */
if (job->state < SRUN_JOB_FORCETERM) {
- if ((now - last_intr_sent) < 1) {
+ if (_diff_tv_str(&last_intr_sent, &now) < 1000000) {
job_force_termination(job);
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
slurm_step_launch_abort(job->step_ctx);
+#endif
return;
}
info("sending Ctrl-C to job %u.%u",
job->jobid, job->stepid);
last_intr_sent = now;
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
slurm_step_launch_fwd_signal(job->step_ctx, SIGINT);
slurm_step_launch_abort(job->step_ctx);
+#endif
} else {
job_force_termination(job);
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
slurm_step_launch_abort(job->step_ctx);
+#endif
}
}
}
-
static void _default_sigaction(int sig)
{
struct sigaction act;
@@ -1424,7 +1679,11 @@
if (ending)
return;
ending = 1;
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
slurm_step_launch_abort(job->step_ctx);
+#endif
}
/* _srun_signal_mgr - Process daemon-wide signals */
@@ -1456,7 +1715,11 @@
* are ending the job now and we don't need to update
* the state. */
info("forcing job termination");
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(SIGKILL);
+#else
slurm_step_launch_abort(job->step_ctx);
+#endif
break;
case SIGCONT:
info("got SIGCONT");
@@ -1472,9 +1735,72 @@
}
break;
default:
+#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
+ runjob_signal(sig);
+#else
slurm_step_launch_fwd_signal(job->step_ctx, sig);
+#endif
break;
}
}
return NULL;
}
+
+static void _shepard_notify(int shepard_fd)
+{
+ int rc;
+
+ while (1) {
+ rc = write(shepard_fd, "", 1);
+ if (rc == -1) {
+ if ((errno == EAGAIN) || (errno == EINTR))
+ continue;
+ error("write(shepard): %m");
+ }
+ break;
+ }
+ close(shepard_fd);
+}
+
+static int _shepard_spawn(srun_job_t *job, bool got_alloc)
+{
+ int shepard_pipe[2], rc;
+ pid_t shepard_pid;
+ char buf[1];
+
+ if (pipe(shepard_pipe)) {
+ error("pipe: %m");
+ return -1;
+ }
+
+ shepard_pid = fork();
+ if (shepard_pid == -1) {
+ error("fork: %m");
+ return -1;
+ }
+ if (shepard_pid != 0) {
+ close(shepard_pipe[0]);
+ return shepard_pipe[1];
+ }
+
+ /* Wait for parent to notify of completion or I/O error on abort */
+ close(shepard_pipe[1]);
+ while (1) {
+ rc = read(shepard_pipe[0], buf, 1);
+ if (rc == 1) {
+ exit(0);
+ } else if (rc == 0) {
+ break; /* EOF */
+ } else if (rc == -1) {
+ if ((errno == EAGAIN) || (errno == EINTR))
+ continue;
+ break;
+ }
+ }
+
+ (void) slurm_terminate_job_step(job->jobid, job->stepid);
+ if (got_alloc)
+ slurm_complete_job(job->jobid, NO_VAL);
+ exit(0);
+ return -1;
+}
diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c
index 1e50f7e..554b1f8 100644
--- a/src/srun/srun_job.c
+++ b/src/srun/srun_job.c
@@ -73,14 +73,15 @@
* about node allocation to be passed to _job_create_structure()
*/
typedef struct allocation_info {
- uint32_t jobid;
- uint32_t stepid;
- char *nodelist;
- uint32_t nnodes;
- uint32_t num_cpu_groups;
+ char *alias_list;
uint16_t *cpus_per_node;
uint32_t *cpu_count_reps;
+ uint32_t jobid;
+ uint32_t nnodes;
+ char *nodelist;
+ uint32_t num_cpu_groups;
dynamic_plugin_data_t *select_jobinfo;
+ uint32_t stepid;
} allocation_info_t;
/*
@@ -101,7 +102,7 @@
job_create_noalloc(void)
{
srun_job_t *job = NULL;
- allocation_info_t *ai = xmalloc(sizeof(*ai));
+ allocation_info_t *ai = xmalloc(sizeof(allocation_info_t));
uint16_t cpn = 1;
hostlist_t hl = hostlist_create(opt.nodelist);
@@ -145,15 +146,17 @@
{
uint32_t job_id = resp->job_id;
srun_job_t *job = NULL;
- allocation_info_t *ai = xmalloc(sizeof(*ai));
+ allocation_info_t *ai = xmalloc(sizeof(allocation_info_t));
hostlist_t hl = NULL;
char *buf = NULL;
int count = 0;
uint32_t alloc_count = 0;
+ char *step_nodelist = NULL;
ai->jobid = job_id;
ai->stepid = NO_VAL;
- ai->nodelist = opt.alloc_nodelist;
+ ai->alias_list = resp->alias_list;
+ ai->nodelist = opt.alloc_nodelist;
hl = hostlist_create(ai->nodelist);
hostlist_uniq(hl);
alloc_count = hostlist_count(hl);
@@ -178,7 +181,7 @@
hostlist_delete_nth(hl, inx);
ai->nnodes--; /* decrement node count */
}
- if(inc_hl) {
+ if (inc_hl) {
inx = hostlist_find(inc_hl, node_name);
if (inx >= 0) {
error("Requested node %s is also "
@@ -211,20 +214,20 @@
opt.min_nodes = ai->nnodes;
opt.nodes_set = true;
}
- if(!opt.max_nodes)
+ if (!opt.max_nodes)
opt.max_nodes = opt.min_nodes;
- if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
+ if ((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
ai->nnodes = opt.max_nodes;
count = hostlist_count(hl);
- if(!count) {
+ if (!count) {
error("Hostlist is now nothing! Can't run job.");
hostlist_destroy(hl);
goto error;
}
- if(inc_hl) {
+ if (inc_hl) {
count = hostlist_count(inc_hl);
- if(count < ai->nnodes) {
+ if (count < ai->nnodes) {
/* add more nodes to get correct number for
allocation */
hostlist_t tmp_hl = hostlist_copy(hl);
@@ -264,15 +267,15 @@
* know it is less than the number of nodes
* in the allocation
*/
- if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
+ if (opt.ntasks_set && (opt.ntasks < ai->nnodes))
opt.min_nodes = opt.ntasks;
else
opt.min_nodes = ai->nnodes;
opt.nodes_set = true;
}
- if(!opt.max_nodes)
+ if (!opt.max_nodes)
opt.max_nodes = opt.min_nodes;
- if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
+ if ((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
ai->nnodes = opt.max_nodes;
/* Don't reset the ai->nodelist because that is the
* nodelist we want to say the allocation is under
@@ -283,8 +286,12 @@
}
/* get the correct number of hosts to run tasks on */
- if (opt.nodelist) {
- hl = hostlist_create(opt.nodelist);
+ if (opt.nodelist)
+ step_nodelist = opt.nodelist;
+ else if ((opt.distribution == SLURM_DIST_ARBITRARY) && (count == 0))
+ step_nodelist = getenv("SLURM_ARBITRARY_NODELIST");
+ if (step_nodelist) {
+ hl = hostlist_create(step_nodelist);
if (opt.distribution != SLURM_DIST_ARBITRARY)
hostlist_uniq(hl);
if (!hostlist_count(hl)) {
@@ -306,12 +313,11 @@
opt.nodelist = buf;
}
- if (opt.distribution == SLURM_DIST_ARBITRARY) {
- if (count != opt.ntasks) {
- error("You asked for %d tasks but specified %d nodes",
- opt.ntasks, count);
- goto error;
- }
+ if ((opt.distribution == SLURM_DIST_ARBITRARY) &&
+ (count != opt.ntasks)) {
+ error("You asked for %d tasks but specified %d nodes",
+ opt.ntasks, count);
+ goto error;
}
if (ai->nnodes == 0) {
@@ -342,8 +348,9 @@
job_create_allocation(resource_allocation_response_msg_t *resp)
{
srun_job_t *job;
- allocation_info_t *i = xmalloc(sizeof(*i));
+ allocation_info_t *i = xmalloc(sizeof(allocation_info_t));
+ i->alias_list = resp->alias_list;
i->nodelist = _normalize_hostlist(resp->node_list);
i->nnodes = resp->node_cnt;
i->jobid = resp->job_id;
@@ -424,7 +431,6 @@
#if defined HAVE_BGQ
//#if defined HAVE_BGQ && HAVE_BG_FILES
/* always return the ntasks here for Q */
- info("returning %d", opt.ntasks);
return opt.ntasks;
#endif
if (opt.cpus_set) {
@@ -461,13 +467,55 @@
pthread_cond_init(&job->state_cond, NULL);
job->state = SRUN_JOB_INIT;
+ job->alias_list = xstrdup(ainfo->alias_list);
job->nodelist = xstrdup(ainfo->nodelist);
job->stepid = ainfo->stepid;
-#if defined HAVE_BGQ
+#if defined HAVE_BG && !defined HAVE_BG_L_P
//#if defined HAVE_BGQ && defined HAVE_BG_FILES
- job->nhosts = ainfo->nnodes;
- select_g_alter_node_cnt(SELECT_APPLY_NODE_MAX_OFFSET, &job->nhosts);
+ /* Since the allocation will have the correct cnode count get
+ it if it is available. Else grab it from opt.min_nodes
+ (meaning the allocation happened before).
+ */
+ if (ainfo->select_jobinfo)
+ select_g_select_jobinfo_get(ainfo->select_jobinfo,
+ SELECT_JOBDATA_NODE_CNT,
+ &job->nhosts);
+ else
+ job->nhosts = opt.min_nodes;
+ /* If we didn't ask for nodes set it up correctly here so the
+ step allocation does the correct thing.
+ */
+ if (!opt.nodes_set) {
+ opt.min_nodes = opt.max_nodes = job->nhosts;
+ opt.nodes_set = true;
+ opt.ntasks_per_node = NO_VAL;
+ bg_figure_nodes_tasks();
+
+#if defined HAVE_BG_FILES
+ /* Replace the runjob line with correct information. */
+ int i, matches = 0;
+ for (i = 0; i < opt.argc; i++) {
+ if (!strcmp(opt.argv[i], "-p")) {
+ i++;
+ xfree(opt.argv[i]);
+ opt.argv[i] = xstrdup_printf(
+ "%d", opt.ntasks_per_node);
+ matches++;
+ } else if (!strcmp(opt.argv[i], "--np")) {
+ i++;
+ xfree(opt.argv[i]);
+ opt.argv[i] = xstrdup_printf(
+ "%d", opt.ntasks);
+ matches++;
+ }
+ if (matches == 2)
+ break;
+ }
+ xassert(matches == 2);
+#endif
+ }
+
#elif defined HAVE_FRONT_END /* Limited job step support */
opt.overcommit = true;
job->nhosts = 1;
@@ -520,12 +568,14 @@
static char *
_normalize_hostlist(const char *hostlist)
{
- char *buf = NULL;
+ char *buf = NULL;
hostlist_t hl = hostlist_create(hostlist);
- if (hl)
+ if (hl) {
buf = hostlist_ranged_string_xmalloc(hl);
- if (!hl || !buf)
+ hostlist_destroy(hl);
+ }
+ if (!buf)
return xstrdup(hostlist);
return buf;
diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h
index 675da90..974d2e4 100644
--- a/src/srun/srun_job.h
+++ b/src/srun/srun_job.h
@@ -99,6 +99,7 @@
int rc; /* srun return code */
+ char *alias_list; /* node name/address/hostnamne aliases */
char *nodelist; /* nodelist in string form */
fname_t *ifname;
diff --git a/src/srun/task_state.c b/src/srun/task_state.c
index bede86a..e3f7fca 100644
--- a/src/srun/task_state.c
+++ b/src/srun/task_state.c
@@ -173,8 +173,12 @@
void task_state_print (task_state_t ts, log_f fn)
{
- bitstr_t *unseen = bit_alloc (ts->n_tasks);
+ bitstr_t *unseen;
+ if (!ts) /* Not built yet */
+ return;
+
+ unseen = bit_alloc (ts->n_tasks);
if (bit_set_count (ts->start_failed)) {
_do_log_msg (ts->start_failed, fn, "failed to start");
bit_or (unseen, ts->start_failed);
diff --git a/src/srun_cr/Makefile.in b/src/srun_cr/Makefile.in
index aba8ab0..1cd1d38 100644
--- a/src/srun_cr/Makefile.in
+++ b/src/srun_cr/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -191,6 +191,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -227,6 +228,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -398,7 +400,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-srun_cr$(EXEEXT): $(srun_cr_OBJECTS) $(srun_cr_DEPENDENCIES)
+srun_cr$(EXEEXT): $(srun_cr_OBJECTS) $(srun_cr_DEPENDENCIES) $(EXTRA_srun_cr_DEPENDENCIES)
@rm -f srun_cr$(EXEEXT)
$(srun_cr_LINK) $(srun_cr_OBJECTS) $(srun_cr_LDADD) $(LIBS)
@@ -536,10 +538,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sshare/Makefile.in b/src/sshare/Makefile.in
index 134a090..bd6668a 100644
--- a/src/sshare/Makefile.in
+++ b/src/sshare/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -190,6 +190,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -226,6 +227,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -402,7 +404,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sshare$(EXEEXT): $(sshare_OBJECTS) $(sshare_DEPENDENCIES)
+sshare$(EXEEXT): $(sshare_OBJECTS) $(sshare_DEPENDENCIES) $(EXTRA_sshare_DEPENDENCIES)
@rm -f sshare$(EXEEXT)
$(sshare_LINK) $(sshare_OBJECTS) $(sshare_LDADD) $(LIBS)
@@ -541,10 +543,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sshare/process.c b/src/sshare/process.c
index 1172ec5..760653d 100644
--- a/src/sshare/process.c
+++ b/src/sshare/process.c
@@ -62,6 +62,7 @@
enum {
PRINT_ACCOUNT,
PRINT_CLUSTER,
+ PRINT_CPUMINS,
PRINT_EUSED,
PRINT_FSFACTOR,
PRINT_ID,
@@ -69,6 +70,7 @@
PRINT_NORMU,
PRINT_RAWS,
PRINT_RAWU,
+ PRINT_RUNMINS,
PRINT_USER,
};
@@ -82,7 +84,7 @@
slurm_addto_char_list(format_list,
"A,User,RawShares,NormShares,"
"RawUsage,NormUsage,EffUsage,"
- "FSFctr");
+ "FSFctr,GrpCPUMins,CPURunMins");
} else {
slurm_addto_char_list(format_list,
"A,User,RawShares,NormShares,"
@@ -103,7 +105,7 @@
field->name = xstrdup("Account");
field->len = -20;
field->print_routine = print_fields_str;
- } else if (!strncasecmp("Cluster", object, 1)) {
+ } else if (!strncasecmp("Cluster", object, 2)) {
field->type = PRINT_CLUSTER;
field->name = xstrdup("Cluster");
field->len = 10;
@@ -148,6 +150,16 @@
field->name = xstrdup("User");
field->len = 10;
field->print_routine = print_fields_str;
+ } else if (!strncasecmp("GrpCPUMins", object, 1)) {
+ field->type = PRINT_CPUMINS;
+ field->name = xstrdup("GrpCPUMins");
+ field->len = 11;
+ field->print_routine = print_fields_uint64;
+ } else if (!strncasecmp("CPURunMins", object, 2)) {
+ field->type = PRINT_RUNMINS;
+ field->name = xstrdup("CPURunMins");
+ field->len = 15;
+ field->print_routine = print_fields_uint64;
} else {
exit_code=1;
fprintf(stderr, "Unknown field '%s'\n", object);
@@ -268,6 +280,16 @@
tmp_char,
(curr_inx == field_count));
break;
+ case PRINT_CPUMINS:
+ field->print_routine(field,
+ share->grp_cpu_mins,
+ (curr_inx == field_count));
+ break;
+ case PRINT_RUNMINS:
+ field->print_routine(field,
+ share->cpu_run_mins,
+ (curr_inx == field_count));
+ break;
default:
field->print_routine(
field, NULL,
diff --git a/src/sstat/Makefile.am b/src/sstat/Makefile.am
index 95532b3..b01ba0b 100644
--- a/src/sstat/Makefile.am
+++ b/src/sstat/Makefile.am
@@ -8,9 +8,12 @@
sstat_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
-
-noinst_HEADERS = sstat.c
-sstat_SOURCES = sstat.c process.c print.c options.c
+sstat_SOURCES = \
+ options.c \
+ print.c \
+ process.c \
+ sstat.c \
+ sstat.h
force:
$(sstat_LDADD) : force
diff --git a/src/sstat/Makefile.in b/src/sstat/Makefile.in
index 97b1cd5..a4fbce3 100644
--- a/src/sstat/Makefile.in
+++ b/src/sstat/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -17,7 +17,6 @@
# Makefile for sstat
-
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
@@ -40,8 +39,7 @@
target_triplet = @target@
bin_PROGRAMS = sstat$(EXEEXT)
subdir = src/sstat
-DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
$(top_srcdir)/auxdir/libtool.m4 \
@@ -89,8 +87,8 @@
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
PROGRAMS = $(bin_PROGRAMS)
-am_sstat_OBJECTS = sstat.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \
- options.$(OBJEXT)
+am_sstat_OBJECTS = options.$(OBJEXT) print.$(OBJEXT) process.$(OBJEXT) \
+ sstat.$(OBJEXT)
sstat_OBJECTS = $(am_sstat_OBJECTS)
am__DEPENDENCIES_1 =
sstat_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \
@@ -113,7 +111,6 @@
$(LDFLAGS) -o $@
SOURCES = $(sstat_SOURCES)
DIST_SOURCES = $(sstat_SOURCES)
-HEADERS = $(noinst_HEADERS)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -194,6 +191,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +228,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -320,8 +319,13 @@
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir)
sstat_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
-noinst_HEADERS = sstat.c
-sstat_SOURCES = sstat.c process.c print.c options.c
+sstat_SOURCES = \
+ options.c \
+ print.c \
+ process.c \
+ sstat.c \
+ sstat.h
+
sstat_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
all: all-am
@@ -400,7 +404,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sstat$(EXEEXT): $(sstat_OBJECTS) $(sstat_DEPENDENCIES)
+sstat$(EXEEXT): $(sstat_OBJECTS) $(sstat_DEPENDENCIES) $(EXTRA_sstat_DEPENDENCIES)
@rm -f sstat$(EXEEXT)
$(sstat_LINK) $(sstat_OBJECTS) $(sstat_LDADD) $(LIBS)
@@ -526,7 +530,7 @@
done
check-am: all-am
check: check-am
-all-am: Makefile $(PROGRAMS) $(HEADERS)
+all-am: Makefile $(PROGRAMS)
installdirs:
for dir in "$(DESTDIR)$(bindir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
@@ -541,10 +545,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c
index 16b08d6..75c589d 100644
--- a/src/sstat/sstat.c
+++ b/src/sstat/sstat.c
@@ -58,18 +58,18 @@
{-12, "JobID", print_fields_str, PRINT_JOBID},
{8, "MaxPages", print_fields_str, PRINT_MAXPAGES},
{12, "MaxPagesNode", print_fields_str, PRINT_MAXPAGESNODE},
- {14, "MaxPagesTask", print_fields_int, PRINT_MAXPAGESTASK},
+ {14, "MaxPagesTask", print_fields_uint, PRINT_MAXPAGESTASK},
{10, "MaxRSS", print_fields_str, PRINT_MAXRSS},
{10, "MaxRSSNode", print_fields_str, PRINT_MAXRSSNODE},
- {10, "MaxRSSTask", print_fields_int, PRINT_MAXRSSTASK},
+ {10, "MaxRSSTask", print_fields_uint, PRINT_MAXRSSTASK},
{10, "MaxVMSize", print_fields_str, PRINT_MAXVSIZE},
{14, "MaxVMSizeNode", print_fields_str, PRINT_MAXVSIZENODE},
- {14, "MaxVMSizeTask", print_fields_int, PRINT_MAXVSIZETASK},
+ {14, "MaxVMSizeTask", print_fields_uint, PRINT_MAXVSIZETASK},
{10, "MinCPU", print_fields_str, PRINT_MINCPU},
{10, "MinCPUNode", print_fields_str, PRINT_MINCPUNODE},
- {10, "MinCPUTask", print_fields_int, PRINT_MINCPUTASK},
+ {10, "MinCPUTask", print_fields_uint, PRINT_MINCPUTASK},
{20, "Nodelist", print_fields_str, PRINT_NODELIST},
- {8, "NTasks", print_fields_int, PRINT_NTASKS},
+ {8, "NTasks", print_fields_uint, PRINT_NTASKS},
{20, "Pids", print_fields_str, PRINT_PIDS},
{0, NULL, NULL, 0}};
diff --git a/src/strigger/Makefile.in b/src/strigger/Makefile.in
index fed5a4b..a150ad4 100644
--- a/src/strigger/Makefile.in
+++ b/src/strigger/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -194,6 +194,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -230,6 +231,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -400,7 +402,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-strigger$(EXEEXT): $(strigger_OBJECTS) $(strigger_DEPENDENCIES)
+strigger$(EXEEXT): $(strigger_OBJECTS) $(strigger_DEPENDENCIES) $(EXTRA_strigger_DEPENDENCIES)
@rm -f strigger$(EXEEXT)
$(strigger_LINK) $(strigger_OBJECTS) $(strigger_LDADD) $(LIBS)
@@ -539,10 +541,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/strigger/opts.c b/src/strigger/opts.c
index fb105e6..4d00c16 100644
--- a/src/strigger/opts.c
+++ b/src/strigger/opts.c
@@ -73,6 +73,7 @@
#define OPT_LONG_USER 0x105
#define OPT_LONG_BLOCK_ERR 0x106
#define OPT_LONG_FRONT_END 0x107
+#define OPT_LONG_FLAGS 0x108
/* getopt_long options, integers but not characters */
@@ -127,6 +128,7 @@
{"version", no_argument, 0, 'V'},
{"block_err", no_argument, 0, OPT_LONG_BLOCK_ERR},
{"clear", no_argument, 0, OPT_LONG_CLEAR},
+ {"flags", required_argument, 0, OPT_LONG_FLAGS},
{"front_end", no_argument, 0, OPT_LONG_FRONT_END},
{"get", no_argument, 0, OPT_LONG_GET},
{"help", no_argument, 0, OPT_LONG_HELP},
@@ -268,6 +270,14 @@
case (int) OPT_LONG_CLEAR:
params.mode_clear = true;
break;
+ case (int) OPT_LONG_FLAGS:
+ if (!strncasecmp(optarg, "perm", 4))
+ params.flags = TRIGGER_FLAG_PERM;
+ else {
+ error("Invalid flags %s", optarg);
+ exit(1);
+ }
+ break;
case (int) OPT_LONG_FRONT_END:
params.front_end = true;
break;
@@ -307,6 +317,7 @@
params.bu_ctld_fail = false;
params.bu_ctld_res_op = false;
params.bu_ctld_as_ctrl = false;
+ params.flags = 0;
params.front_end = false;
params.node_down = false;
params.node_drained = false;
@@ -326,7 +337,7 @@
params.reconfig = false;
params.time_limit = false;
params.node_up = false;
- params.user_id = 0;
+ params.user_id = NO_VAL;
params.verbose = 0;
}
@@ -338,6 +349,7 @@
verbose("get = %s", params.mode_get ? "true" : "false");
verbose("clear = %s", params.mode_clear ? "true" : "false");
verbose("block_err = %s", params.block_err ? "true" : "false");
+ verbose("flags = %u", params.flags);
verbose("front_end = %s", params.front_end ? "true" : "false");
verbose("job_id = %u", params.job_id);
verbose("job_fini = %s", params.job_fini ? "true" : "false");
@@ -353,7 +365,10 @@
verbose("reconfig = %s", params.reconfig ? "true" : "false");
verbose("time_limit = %s", params.time_limit ? "true" : "false");
verbose("trigger_id = %u", params.trigger_id);
- verbose("user_id = %u", params.user_id);
+ if (params.user_id == NO_VAL)
+ verbose("user_id = N/A");
+ else
+ verbose("user_id = %u", params.user_id);
verbose("verbose = %d", params.verbose);
verbose("primary_slurmctld_failure = %s",
params.pri_ctld_fail ? "true" : "false");
@@ -388,8 +403,8 @@
exit(1);
}
- if (params.mode_clear
- && ((params.trigger_id + params.job_id + params.user_id) == 0)) {
+ if (params.mode_clear && (params.user_id == NO_VAL) &&
+ (params.trigger_id == 0) && (params.job_id == 0)) {
error("You must specify a --id, --jobid, or --user to clear");
exit(1);
}
@@ -489,6 +504,7 @@
trigger event when primary slurmctld acct buffer full\n\
-F, --fail trigger event when node is expected to FAIL\n\
-f, --fini trigger event when job finishes\n\
+ --flags=perm trigger event flag (perm = permanent)\n\n\
-g, --primary_slurmdbd_failure\n\
trigger when primary slurmdbd fails\n\
-G, --primary_slurmdbd_resumed_operation\n\
diff --git a/src/strigger/strigger.c b/src/strigger/strigger.c
index 4c0b876..0203fa3 100644
--- a/src/strigger/strigger.c
+++ b/src/strigger/strigger.c
@@ -63,6 +63,7 @@
static int _clear_trigger(void);
static int _get_trigger(void);
static int _set_trigger(void);
+static char *_trig_flags(uint16_t flags);
static int _trig_offset(uint16_t offset);
static char *_trig_user(uint32_t user_id);
@@ -115,7 +116,7 @@
if (params.job_id)
verbose("triggers for job %s cleared", ti.res_id);
- else if (params.user_id)
+ else if (params.user_id != NO_VAL)
verbose("triggers for user %u cleared", ti.user_id);
else
verbose("trigger %u cleared", ti.trig_id);
@@ -204,7 +205,8 @@
ti.res_type = TRIGGER_RES_TYPE_DATABASE;
}
- ti.offset = params.offset + 0x8000;
+ ti.flags = params.flags;
+ ti.offset = params.offset + 0x8000;
ti.program = params.program;
while (slurm_set_trigger(&ti)) {
@@ -307,7 +309,7 @@
trig_msg->trigger_array[i].trig_id)
continue;
}
- if (params.user_id) {
+ if (params.user_id != NO_VAL) {
if (params.user_id !=
trig_msg->trigger_array[i].user_id)
continue;
@@ -397,18 +399,19 @@
/* 35353535353535353535353535353535353 */
"TYPE "
- /* 666666 88888888 xxxxxxx */
- "OFFSET USER PROGRAM\n");
+ /* 666666 88888888 55555 xxxxxxx */
+ "OFFSET USER FLAGS PROGRAM\n");
}
line_no++;
- printf("%7u %-9s %7s %-35s %6d %-8s %s\n",
+ printf("%7u %-9s %7s %-35s %6d %-8s %-5s %s\n",
trig_msg->trigger_array[i].trig_id,
trigger_res_type(trig_msg->trigger_array[i].res_type),
trig_msg->trigger_array[i].res_id,
trigger_type(trig_msg->trigger_array[i].trig_type),
_trig_offset(trig_msg->trigger_array[i].offset),
_trig_user(trig_msg->trigger_array[i].user_id),
+ _trig_flags(trig_msg->trigger_array[i].flags),
trig_msg->trigger_array[i].program);
}
@@ -416,6 +419,13 @@
return 0;
}
+static char *_trig_flags(uint16_t flags)
+{
+ if (flags & TRIGGER_FLAG_PERM)
+ return "PERM";
+ return "";
+}
+
static int _trig_offset(uint16_t offset)
{
static int rc;
diff --git a/src/strigger/strigger.h b/src/strigger/strigger.h
index 0c31f17..004a9a6 100644
--- a/src/strigger/strigger.h
+++ b/src/strigger/strigger.h
@@ -52,6 +52,7 @@
struct strigger_parameters {
bool block_err;
List clusters;
+ uint16_t flags;
bool front_end;
bool job_fini;
uint32_t job_id;
diff --git a/src/sview/Makefile.am b/src/sview/Makefile.am
index f7d8627..2896382 100644
--- a/src/sview/Makefile.am
+++ b/src/sview/Makefile.am
@@ -5,7 +5,7 @@
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
-if HAVE_GTK
+if BUILD_SVIEW
bin_PROGRAMS = sview
diff --git a/src/sview/Makefile.in b/src/sview/Makefile.in
index 3dea976..c9c37e2 100644
--- a/src/sview/Makefile.in
+++ b/src/sview/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -40,7 +40,7 @@
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-@HAVE_GTK_TRUE@bin_PROGRAMS = sview$(EXEEXT)
+@BUILD_SVIEW_TRUE@bin_PROGRAMS = sview$(EXEEXT)
subdir = src/sview
DIST_COMMON = $(am__noinst_HEADERS_DIST) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
@@ -95,28 +95,28 @@
job_info.c block_info.c front_end_info.c node_info.c \
resv_info.c submit_info.c admin_info.c common.c config_info.c \
defaults.c
-@HAVE_GTK_TRUE@am_sview_OBJECTS = sview-sview.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-popups.$(OBJEXT) sview-grid.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-part_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-job_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-block_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-front_end_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-node_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-resv_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-submit_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-admin_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-common.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-config_info.$(OBJEXT) \
-@HAVE_GTK_TRUE@ sview-defaults.$(OBJEXT)
+@BUILD_SVIEW_TRUE@am_sview_OBJECTS = sview-sview.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-popups.$(OBJEXT) sview-grid.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-part_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-job_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-block_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-front_end_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-node_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-resv_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-submit_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-admin_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-common.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-config_info.$(OBJEXT) \
+@BUILD_SVIEW_TRUE@ sview-defaults.$(OBJEXT)
am__EXTRA_sview_SOURCES_DIST = sview.h sview.c popups.c grid.c \
part_info.c job_info.c block_info.c front_end_info.c \
node_info.c resv_info.c submit_info.c admin_info.c common.c \
config_info.c defaults.c
sview_OBJECTS = $(am_sview_OBJECTS)
am__DEPENDENCIES_1 =
-@HAVE_GTK_TRUE@sview_DEPENDENCIES = \
-@HAVE_GTK_TRUE@ $(top_builddir)/src/db_api/libslurmdb.o \
-@HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1)
+@BUILD_SVIEW_TRUE@sview_DEPENDENCIES = \
+@BUILD_SVIEW_TRUE@ $(top_builddir)/src/db_api/libslurmdb.o \
+@BUILD_SVIEW_TRUE@ $(am__DEPENDENCIES_1)
sview_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(sview_CFLAGS) $(CFLAGS) $(sview_LDFLAGS) \
$(LDFLAGS) -o $@
@@ -218,6 +218,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -254,6 +255,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -343,18 +345,18 @@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
-@HAVE_GTK_TRUE@sview_LDADD = $(top_builddir)/src/db_api/libslurmdb.o $(DL_LIBS)
-@HAVE_GTK_TRUE@noinst_HEADERS = sview.h
-@HAVE_GTK_TRUE@sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \
-@HAVE_GTK_TRUE@ block_info.c front_end_info.c node_info.c resv_info.c \
-@HAVE_GTK_TRUE@ submit_info.c admin_info.c common.c \
-@HAVE_GTK_TRUE@ config_info.c defaults.c
+@BUILD_SVIEW_TRUE@sview_LDADD = $(top_builddir)/src/db_api/libslurmdb.o $(DL_LIBS)
+@BUILD_SVIEW_TRUE@noinst_HEADERS = sview.h
+@BUILD_SVIEW_TRUE@sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \
+@BUILD_SVIEW_TRUE@ block_info.c front_end_info.c node_info.c resv_info.c \
+@BUILD_SVIEW_TRUE@ submit_info.c admin_info.c common.c \
+@BUILD_SVIEW_TRUE@ config_info.c defaults.c
-@HAVE_GTK_TRUE@sview_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) $(BG_LDFLAGS) $(GTK_LIBS)
-@HAVE_GTK_TRUE@sview_CFLAGS = $(GTK_CFLAGS)
-@HAVE_GTK_FALSE@EXTRA_sview_SOURCES = sview.h sview.c popups.c grid.c part_info.c job_info.c \
-@HAVE_GTK_FALSE@ block_info.c front_end_info.c node_info.c resv_info.c \
-@HAVE_GTK_FALSE@ submit_info.c admin_info.c common.c config_info.c defaults.c
+@BUILD_SVIEW_TRUE@sview_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) $(BG_LDFLAGS) $(GTK_LIBS)
+@BUILD_SVIEW_TRUE@sview_CFLAGS = $(GTK_CFLAGS)
+@BUILD_SVIEW_FALSE@EXTRA_sview_SOURCES = sview.h sview.c popups.c grid.c part_info.c job_info.c \
+@BUILD_SVIEW_FALSE@ block_info.c front_end_info.c node_info.c resv_info.c \
+@BUILD_SVIEW_FALSE@ submit_info.c admin_info.c common.c config_info.c defaults.c
all: all-am
@@ -433,7 +435,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-sview$(EXEEXT): $(sview_OBJECTS) $(sview_DEPENDENCIES)
+sview$(EXEEXT): $(sview_OBJECTS) $(sview_DEPENDENCIES) $(EXTRA_sview_DEPENDENCIES)
@rm -f sview$(EXEEXT)
$(sview_LINK) $(sview_OBJECTS) $(sview_LDADD) $(LIBS)
@@ -780,10 +782,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -882,9 +889,9 @@
uninstall-binPROGRAMS
-@HAVE_GTK_TRUE@force:
-@HAVE_GTK_TRUE@$(sview_LDADD) : force
-@HAVE_GTK_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@`
+@BUILD_SVIEW_TRUE@force:
+@BUILD_SVIEW_TRUE@$(sview_LDADD) : force
+@BUILD_SVIEW_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@`
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/src/sview/admin_info.c b/src/sview/admin_info.c
index 02b47de..55149e2 100644
--- a/src/sview/admin_info.c
+++ b/src/sview/admin_info.c
@@ -49,7 +49,7 @@
{G_TYPE_STRING, SORTID_TIMELIMIT, "TIMELIMIT", TRUE, -1},
{G_TYPE_STRING, SORTID_NODES, "NODES", TRUE, -1},
#ifdef HAVE_BG
- {G_TYPE_STRING, SORTID_NODELIST, "BP_LIST", TRUE, -1},
+ {G_TYPE_STRING, SORTID_NODELIST, "MIDPLANELIST", TRUE, -1},
#else
{G_TYPE_STRING, SORTID_NODELIST, "NODELIST", TRUE, -1},
#endif
diff --git a/src/sview/block_info.c b/src/sview/block_info.c
index d7e3d34..693fd1d 100644
--- a/src/sview/block_info.c
+++ b/src/sview/block_info.c
@@ -34,7 +34,6 @@
#define _DEBUG 0
typedef struct {
- char *bg_user_name;
char *bg_block_name;
char *slurm_part_name;
char *mp_str;
@@ -43,12 +42,17 @@
uint16_t state;
int size;
int cnode_cnt;
- int *bp_inx; /* list index pairs into node_table for *mp_str:
+ int cnode_err_cnt;
+ GtkTreeIter iter_ptr;
+ bool iter_set;
+ int *mp_inx; /* list index pairs into node_table for *mp_str:
* start_range_1, end_range_1,
* start_range_2, .., -1 */
int color_inx;
- int job_running;
+ List job_list;
+ int pos;
bool printed;
+ char *reason;
bool small_block;
char *imageblrts; /* ImageBlrts for this block */
char *imagelinux; /* ImageLinux for this block */
@@ -59,7 +63,6 @@
enum {
SORTID_POS = POS_LOC,
SORTID_BLOCK,
- SORTID_NODELIST,
SORTID_COLOR,
SORTID_COLOR_INX,
SORTID_CONN,
@@ -74,8 +77,10 @@
SORTID_IMAGERAMDISK,
SORTID_IMAGEMLOADER,
#endif
- SORTID_MP_STR,
+ SORTID_NODELIST,
+ SORTID_NODE_CNT,
SORTID_PARTITION,
+ SORTID_REASON,
SORTID_STATE,
SORTID_UPDATED,
SORTID_USE,
@@ -88,7 +93,7 @@
/*these are the settings to apply for the user
* on the first startup after a fresh slurm install.*/
static char *_initial_page_opts = "Block_ID,State,JobID,User,Node_Count,"
- "Node_Use,BP_List,Partition";
+ "Node_Use,MidplaneList,Partition";
static display_data_t display_data_block[] = {
{G_TYPE_INT, SORTID_POS, NULL, FALSE, EDIT_NONE, refresh_block,
@@ -102,14 +107,19 @@
create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_JOB, "JobID", FALSE, EDIT_NONE, refresh_block,
create_model_block, admin_edit_block},
+#ifdef HAVE_BG_L_P
{G_TYPE_STRING, SORTID_USER, "User", FALSE, EDIT_NONE, refresh_block,
create_model_block, admin_edit_block},
- {G_TYPE_STRING, SORTID_MP_STR, "Node Count",
+#else
+ {G_TYPE_STRING, SORTID_USER, NULL, FALSE, EDIT_NONE, refresh_block,
+ create_model_block, admin_edit_block},
+#endif
+ {G_TYPE_STRING, SORTID_NODE_CNT, "Node Count",
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_CONN, "Connection Type",
FALSE, EDIT_NONE, refresh_block,
create_model_block, admin_edit_block},
- {G_TYPE_STRING, SORTID_NODELIST, "BP List", FALSE,
+ {G_TYPE_STRING, SORTID_NODELIST, "MidplaneList", FALSE,
EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_PARTITION, "Partition",
FALSE, EDIT_NONE, refresh_block,
@@ -123,7 +133,7 @@
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_IMAGERAMDISK, "Image Ramdisk",
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
-#else
+#elif defined HAVE_BGP
{G_TYPE_STRING, SORTID_USE, NULL, FALSE, EDIT_NONE, refresh_block,
create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_IMAGEBLRTS, NULL,
@@ -132,9 +142,20 @@
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
{G_TYPE_STRING, SORTID_IMAGERAMDISK, "Image Ioload",
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
+#elif defined HAVE_BGQ
+ {G_TYPE_STRING, SORTID_USE, NULL, FALSE, EDIT_NONE, refresh_block,
+ create_model_block, admin_edit_block},
+ {G_TYPE_STRING, SORTID_IMAGEBLRTS, NULL,
+ FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
+ {G_TYPE_STRING, SORTID_IMAGELINUX, NULL,
+ FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
+ {G_TYPE_STRING, SORTID_IMAGERAMDISK, NULL,
+ FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
#endif
{G_TYPE_STRING, SORTID_IMAGEMLOADER, "Image Mloader",
FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
+ {G_TYPE_STRING, SORTID_REASON, "Reason",
+ FALSE, EDIT_NONE, refresh_block, create_model_block, admin_edit_block},
{G_TYPE_POINTER, SORTID_NODE_INX, NULL, FALSE, EDIT_NONE,
refresh_block, create_model_resv, admin_edit_resv},
{G_TYPE_INT, SORTID_COLOR_INX, NULL, FALSE, EDIT_NONE,
@@ -161,7 +182,7 @@
TRUE, ADMIN_PAGE},
{G_TYPE_STRING, JOB_PAGE, "Jobs", TRUE, BLOCK_PAGE},
{G_TYPE_STRING, PART_PAGE, "Partitions", TRUE, BLOCK_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Base Partitions", TRUE, BLOCK_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Midplanes", TRUE, BLOCK_PAGE},
//{G_TYPE_STRING, SUBMIT_PAGE, "Job Submit", FALSE, BLOCK_PAGE},
{G_TYPE_STRING, RESV_PAGE, "Reservations", TRUE, BLOCK_PAGE},
{G_TYPE_NONE, -1, NULL, FALSE, EDIT_NONE}
@@ -171,26 +192,57 @@
static void _admin_block(GtkTreeModel *model, GtkTreeIter *iter, char *type);
static void _append_block_record(sview_block_info_t *block_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter,
- int line);
+ GtkTreeStore *treestore);
static int _in_slurm_partition(int *part_inx, int *block_inx);
static void _process_each_block(GtkTreeModel *model, GtkTreePath *path,
GtkTreeIter*iter, gpointer userdata);
+static char *_set_running_job_str(List job_list, bool compact)
+{
+ int cnt = list_count(job_list);
+ block_job_info_t *block_job;
+
+ if (!cnt) {
+ return xstrdup("-");
+ } else if (cnt == 1) {
+ block_job = list_peek(job_list);
+ return xstrdup_printf("%u", block_job->job_id);
+ } else if (compact)
+ return xstrdup("multiple");
+ else {
+ char *tmp_char = NULL;
+ ListIterator itr = list_iterator_create(job_list);
+ while ((block_job = list_next(itr))) {
+ if (tmp_char)
+ xstrcat(tmp_char, " ");
+ xstrfmtcat(tmp_char, "%u", block_job->job_id);
+ }
+ return tmp_char;
+ }
+
+ return NULL;
+}
+
static void _block_list_del(void *object)
{
sview_block_info_t *block_ptr = (sview_block_info_t *)object;
if (block_ptr) {
- xfree(block_ptr->bg_user_name);
xfree(block_ptr->bg_block_name);
xfree(block_ptr->slurm_part_name);
xfree(block_ptr->mp_str);
+ xfree(block_ptr->reason);
xfree(block_ptr->imageblrts);
xfree(block_ptr->imagelinux);
xfree(block_ptr->imagemloader);
xfree(block_ptr->imageramdisk);
- /* don't xfree(block_ptr->bp_inx);
+
+ if (block_ptr->job_list) {
+ list_destroy(block_ptr->job_list);
+ block_ptr->job_list = NULL;
+ }
+
+ /* don't xfree(block_ptr->mp_inx);
it isn't copied like the chars and is freed in the api
*/
xfree(block_ptr);
@@ -198,17 +250,17 @@
}
}
-static int _in_slurm_partition(int *part_inx, int *bp_inx)
+static int _in_slurm_partition(int *part_inx, int *mp_inx)
{
int found = 0;
int i=0, j=0;
- while (bp_inx[i] >= 0) {
+ while (mp_inx[i] >= 0) {
j = 0;
found = 0;
while (part_inx[j] >= 0) {
- if ((bp_inx[i] >= part_inx[j])
- && bp_inx[i+1] <= part_inx[j+1]) {
+ if ((mp_inx[i] >= part_inx[j])
+ && mp_inx[i+1] <= part_inx[j+1]) {
found = 1;
break;
}
@@ -226,7 +278,7 @@
sview_block_info_t *block_ptr,
int update)
{
- char tmp_cnt[18];
+ char tmp_cnt[18], tmp_cnt2[18];
char *tmp_char = NULL;
GtkTreeIter iter;
GtkTreeStore *treestore =
@@ -243,7 +295,25 @@
tmp_char);
xfree(tmp_char);
- if (cluster_flags & CLUSTER_FLAG_BGL) {
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ add_display_treestore_line(update, treestore, &iter,
+ find_col_name(display_data_block,
+ SORTID_IMAGEMLOADER),
+ block_ptr->imagemloader);
+ } else if (cluster_flags & CLUSTER_FLAG_BGP) {
+ add_display_treestore_line(update, treestore, &iter,
+ find_col_name(display_data_block,
+ SORTID_IMAGELINUX),
+ block_ptr->imagelinux);
+ add_display_treestore_line(update, treestore, &iter,
+ find_col_name(display_data_block,
+ SORTID_IMAGERAMDISK),
+ block_ptr->imageramdisk);
+ add_display_treestore_line(update, treestore, &iter,
+ find_col_name(display_data_block,
+ SORTID_IMAGEMLOADER),
+ block_ptr->imagemloader);
+ } else if (cluster_flags & CLUSTER_FLAG_BGL) {
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
SORTID_IMAGEBLRTS),
@@ -260,31 +330,15 @@
find_col_name(display_data_block,
SORTID_IMAGERAMDISK),
block_ptr->imageramdisk);
- } else {
- add_display_treestore_line(update, treestore, &iter,
- find_col_name(display_data_block,
- SORTID_IMAGELINUX),
- block_ptr->imagelinux);
- add_display_treestore_line(update, treestore, &iter,
- find_col_name(display_data_block,
- SORTID_IMAGERAMDISK),
- block_ptr->imageramdisk);
- add_display_treestore_line(update, treestore, &iter,
- find_col_name(display_data_block,
- SORTID_IMAGEMLOADER),
- block_ptr->imagemloader);
}
- if (block_ptr->job_running > NO_JOB_RUNNING)
- snprintf(tmp_cnt, sizeof(tmp_cnt),
- "%d", block_ptr->job_running);
- else
- snprintf(tmp_cnt, sizeof(tmp_cnt), "-");
+ tmp_char = _set_running_job_str(block_ptr->job_list, 0);
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
SORTID_JOB),
- tmp_cnt);
+ tmp_char);
+ xfree(tmp_char);
if (cluster_flags & CLUSTER_FLAG_BGL) {
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
@@ -294,11 +348,18 @@
}
convert_num_unit((float)block_ptr->cnode_cnt, tmp_cnt, sizeof(tmp_cnt),
UNIT_NONE);
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ convert_num_unit((float)block_ptr->cnode_err_cnt, tmp_cnt2,
+ sizeof(tmp_cnt2), UNIT_NONE);
+ tmp_char = xstrdup_printf("%s/%s", tmp_cnt, tmp_cnt2);
+ } else
+ tmp_char = tmp_cnt;
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
- SORTID_MP_STR),
- tmp_cnt);
-
+ SORTID_NODE_CNT),
+ tmp_char);
+ if (cluster_flags & CLUSTER_FLAG_BGQ)
+ xfree(tmp_char);
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
SORTID_PARTITION),
@@ -309,52 +370,60 @@
bg_block_state_string(block_ptr->state));
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_block,
- SORTID_USER),
- block_ptr->bg_user_name);
+ SORTID_REASON),
+ block_ptr->reason);
}
static void _update_block_record(sview_block_info_t *block_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
- char job_running[20], cnode_cnt[20];
- char *tmp_char = NULL;
+ char cnode_cnt[20], cnode_cnt2[20];
+ char *tmp_char = NULL, *tmp_char2 = NULL, *tmp_char3 = NULL;
- if (block_ptr->job_running > NO_JOB_RUNNING)
- snprintf(job_running, sizeof(job_running),
- "%d", block_ptr->job_running);
- else
- snprintf(job_running, sizeof(job_running), "-");
-
- convert_num_unit((float)block_ptr->cnode_cnt, cnode_cnt, sizeof(cnode_cnt),
- UNIT_NONE);
+ convert_num_unit((float)block_ptr->cnode_cnt, cnode_cnt,
+ sizeof(cnode_cnt), UNIT_NONE);
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ convert_num_unit((float)block_ptr->cnode_err_cnt, cnode_cnt2,
+ sizeof(cnode_cnt), UNIT_NONE);
+ tmp_char3 = xstrdup_printf("%s/%s", cnode_cnt, cnode_cnt2);
+ } else
+ tmp_char3 = cnode_cnt;
tmp_char = conn_type_string_full(block_ptr->bg_conn_type);
-
+ tmp_char2 = _set_running_job_str(block_ptr->job_list, 0);
/* Combining these records provides a slight performance improvement */
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
SORTID_BLOCK, block_ptr->bg_block_name,
SORTID_COLOR,
sview_colors[block_ptr->color_inx],
SORTID_COLOR_INX, block_ptr->color_inx,
SORTID_CONN, tmp_char,
- SORTID_IMAGERAMDISK, block_ptr->imageramdisk,
- SORTID_IMAGELINUX, block_ptr->imagelinux,
SORTID_IMAGEMLOADER, block_ptr->imagemloader,
- SORTID_JOB, job_running,
- SORTID_NODE_INX, block_ptr->bp_inx,
- SORTID_MP_STR, cnode_cnt,
+ SORTID_JOB, tmp_char2,
+ SORTID_NODE_INX, block_ptr->mp_inx,
+ SORTID_NODE_CNT, tmp_char3,
SORTID_NODELIST, block_ptr->mp_str,
SORTID_PARTITION, block_ptr->slurm_part_name,
+ SORTID_REASON, block_ptr->reason,
SORTID_SMALL_BLOCK, block_ptr->small_block,
SORTID_STATE,
bg_block_state_string(block_ptr->state),
- SORTID_USER, block_ptr->bg_user_name,
SORTID_UPDATED, 1,
-1);
xfree(tmp_char);
+ xfree(tmp_char2);
+ if (cluster_flags & CLUSTER_FLAG_BGQ)
+ xfree(tmp_char3);
- if (cluster_flags & CLUSTER_FLAG_BGL) {
- gtk_tree_store_set(treestore, iter,
+ if (cluster_flags & CLUSTER_FLAG_BGP) {
+ gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
+ SORTID_IMAGERAMDISK, block_ptr->imageramdisk,
+ SORTID_IMAGELINUX, block_ptr->imagelinux,
+ -1);
+ } else if (cluster_flags & CLUSTER_FLAG_BGL) {
+ gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
+ SORTID_IMAGERAMDISK, block_ptr->imageramdisk,
+ SORTID_IMAGELINUX, block_ptr->imagelinux,
SORTID_IMAGEBLRTS, block_ptr->imageblrts,
SORTID_USE,
node_use_string(block_ptr->bg_node_use),
@@ -365,12 +434,12 @@
}
static void _append_block_record(sview_block_info_t *block_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter,
- int line)
+ GtkTreeStore *treestore)
{
- gtk_tree_store_append(treestore, iter, NULL);
- gtk_tree_store_set(treestore, iter, SORTID_POS, line, -1);
- _update_block_record(block_ptr, treestore, iter);
+ gtk_tree_store_append(treestore, &block_ptr->iter_ptr, NULL);
+ gtk_tree_store_set(treestore, &block_ptr->iter_ptr, SORTID_POS,
+ block_ptr->pos, -1);
+ _update_block_record(block_ptr, treestore);
}
static void _update_info_block(List block_list,
@@ -378,30 +447,16 @@
{
ListIterator itr;
sview_block_info_t *block_ptr = NULL;
- GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeModel *model = gtk_tree_view_get_model(tree_view);
- GtkTreeIter iter;
+ static GtkTreeModel *last_model = NULL;
char *name = NULL;
- char *host = NULL;
- int line = 0;
if (!block_list) {
g_print("No block_list given");
return;
}
- /* get the iter, or find out the list is empty goto add */
- if (gtk_tree_model_get_iter(model, &iter, path)) {
- /* make sure all the partitions are still here */
- while (1) {
- gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
- SORTID_UPDATED, 0, -1);
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
- }
- }
-
+ set_for_update(model, SORTID_UPDATED);
/* Report the BG Blocks */
@@ -412,45 +467,69 @@
if (!block_ptr->slurm_part_name)
block_ptr->slurm_part_name = xstrdup("no part");
- /* get the iter, or find out the list is empty goto add */
- if (!gtk_tree_model_get_iter(model, &iter, path)) {
- goto adding;
- }
- line = 0;
- while (1) {
- /* search for the jobid and check to see if
- it is in the list */
- gtk_tree_model_get(model, &iter, SORTID_BLOCK,
- &name, -1);
- if (!strcmp(name, block_ptr->bg_block_name)) {
- /* update with new info */
- g_free(name);
- _update_block_record(block_ptr,
- GTK_TREE_STORE(model),
- &iter);
- goto found;
+ /* This means the tree_store changed (added new column
+ or something). */
+ if (last_model != model)
+ block_ptr->iter_set = false;
+
+ if (block_ptr->iter_set) {
+ gtk_tree_model_get(model, &block_ptr->iter_ptr,
+ SORTID_BLOCK, &name, -1);
+ if (strcmp(name, block_ptr->bg_block_name)) {
+ /* Bad pointer */
+ block_ptr->iter_set = false;
}
g_free(name);
-
- line++;
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
}
- adding:
- _append_block_record(block_ptr, GTK_TREE_STORE(model),
- &iter, line);
- found:
- ;
+ if (block_ptr->iter_set)
+ _update_block_record(block_ptr,
+ GTK_TREE_STORE(model));
+ else {
+ GtkTreePath *path = gtk_tree_path_new_first();
+
+ /* get the iter, or find out the list is empty
+ * goto add */
+ if (gtk_tree_model_get_iter(
+ model, &block_ptr->iter_ptr, path)) {
+ do {
+ /* search for the jobid and
+ check to see if it is in
+ the list */
+ gtk_tree_model_get(
+ model,
+ &block_ptr->iter_ptr,
+ SORTID_BLOCK,
+ &name, -1);
+ if (!strcmp(name,
+ block_ptr->bg_block_name)) {
+ /* update with new info */
+ g_free(name);
+ _update_block_record(
+ block_ptr,
+ GTK_TREE_STORE(model));
+ block_ptr->iter_set = 1;
+ break;
+ }
+ g_free(name);
+ } while (gtk_tree_model_iter_next(
+ model,
+ &block_ptr->iter_ptr));
+ }
+
+ if (!block_ptr->iter_set) {
+ _append_block_record(block_ptr,
+ GTK_TREE_STORE(model));
+ block_ptr->iter_set = true;
+ }
+ gtk_tree_path_free(path);
+ }
}
list_iterator_destroy(itr);
- if (host)
- free(host);
- gtk_tree_path_free(path);
/* remove all old blocks */
remove_old(model, SORTID_UPDATED);
+ last_model = model;
}
static int _sview_block_sort_aval_dec(sview_block_info_t* rec_a,
@@ -459,11 +538,9 @@
int size_a = rec_a->cnode_cnt;
int size_b = rec_b->cnode_cnt;
- if ((rec_a->job_running == NO_JOB_RUNNING)
- && (rec_b->job_running != NO_JOB_RUNNING))
+ if (list_count(rec_a->job_list) < list_count(rec_b->job_list))
return 1;
- else if ((rec_a->job_running != NO_JOB_RUNNING)
- && (rec_b->job_running == NO_JOB_RUNNING))
+ else if (list_count(rec_a->job_list) > list_count(rec_b->job_list))
return -1;
if ((rec_a->state == BG_BLOCK_FREE) && (rec_b->state != BG_BLOCK_FREE))
@@ -487,39 +564,72 @@
return 0;
}
-static List _create_block_list(partition_info_msg_t *part_info_ptr,
- block_info_msg_t *block_info_ptr,
- int changed)
+static void _set_block_partition(partition_info_msg_t *part_info_ptr,
+ sview_block_info_t *block_ptr)
{
- int i, j;
- static List block_list = NULL;
+ int j;
partition_info_t part;
+
+ for (j = 0; j < part_info_ptr->record_count; j++) {
+ part = part_info_ptr->partition_array[j];
+ if (_in_slurm_partition(part.node_inx,
+ block_ptr->mp_inx)) {
+ xfree(block_ptr->slurm_part_name);
+ block_ptr->slurm_part_name = xstrdup(part.name);
+ return;
+ }
+ }
+}
+
+static List _create_block_list(partition_info_msg_t *part_info_ptr,
+ block_info_msg_t *block_info_ptr)
+{
+ int i;
+ static List block_list = NULL;
+ static partition_info_msg_t *last_part_info_ptr = NULL;
+ static block_info_msg_t *last_block_info_ptr = NULL;
sview_block_info_t *block_ptr = NULL;
char tmp_mp_str[50];
- if (!changed && block_list) {
+ if (block_list && (part_info_ptr == last_part_info_ptr)
+ && (block_info_ptr == last_block_info_ptr))
return block_list;
- }
- if (block_list)
+ last_part_info_ptr = part_info_ptr;
+ if (block_list) {
+ /* Only the partition info changed so lets update just
+ that part.
+ */
+ if (block_info_ptr == last_block_info_ptr) {
+ ListIterator itr = list_iterator_create(block_list);
+
+ while ((block_ptr = list_next(itr)))
+ _set_block_partition(part_info_ptr, block_ptr);
+
+ return block_list;
+ }
list_flush(block_list);
- else
+ } else
block_list = list_create(_block_list_del);
if (!block_list) {
g_print("malloc error\n");
return NULL;
}
- for (i=0; i<block_info_ptr->record_count; i++) {
- block_ptr = xmalloc(sizeof(sview_block_info_t));
+ last_block_info_ptr = block_info_ptr;
+
+ for (i=0; i<block_info_ptr->record_count; i++) {
+ /* If we don't have a block name just continue since
+ ths block hasn't been made in the system yet. */
+ if (!block_info_ptr->block_array[i].bg_block_id)
+ continue;
+
+ block_ptr = xmalloc(sizeof(sview_block_info_t));
+ block_ptr->pos = i;
block_ptr->bg_block_name
= xstrdup(block_info_ptr->
block_array[i].bg_block_id);
- /* If we don't have a block name just continue since
- ths block hasn't been made in the system yet. */
- if (!block_ptr->bg_block_name)
- continue;
block_ptr->color_inx =
atoi(block_ptr->bg_block_name+7);
@@ -544,20 +654,25 @@
xfree(block_ptr->mp_str);
block_ptr->mp_str = xstrdup(tmp_mp_str);
}
+ block_ptr->reason
+ = xstrdup(block_info_ptr->block_array[i].reason);
- block_ptr->bg_user_name
- = xstrdup(block_info_ptr->
- block_array[i].owner_name);
- if (cluster_flags & CLUSTER_FLAG_BGL)
+ if (cluster_flags & CLUSTER_FLAG_BGP) {
+ block_ptr->imagelinux = xstrdup(
+ block_info_ptr->block_array[i].linuximage);
+ block_ptr->imageramdisk = xstrdup(
+ block_info_ptr->block_array[i].ramdiskimage);
+ } else if (cluster_flags & CLUSTER_FLAG_BGL) {
block_ptr->imageblrts = xstrdup(
block_info_ptr->block_array[i].blrtsimage);
+ block_ptr->imagelinux = xstrdup(
+ block_info_ptr->block_array[i].linuximage);
+ block_ptr->imageramdisk = xstrdup(
+ block_info_ptr->block_array[i].ramdiskimage);
+ }
- block_ptr->imagelinux = xstrdup(
- block_info_ptr->block_array[i].linuximage);
block_ptr->imagemloader = xstrdup(
block_info_ptr->block_array[i].mloaderimage);
- block_ptr->imageramdisk = xstrdup(
- block_info_ptr->block_array[i].ramdiskimage);
block_ptr->state
= block_info_ptr->block_array[i].state;
@@ -571,19 +686,17 @@
block_ptr->cnode_cnt
= block_info_ptr->block_array[i].cnode_cnt;
- block_ptr->bp_inx
+ block_ptr->cnode_err_cnt
+ = block_info_ptr->block_array[i].cnode_err_cnt;
+ block_ptr->mp_inx
= block_info_ptr->block_array[i].mp_inx;
- for(j = 0; j < part_info_ptr->record_count; j++) {
- part = part_info_ptr->partition_array[j];
- if (_in_slurm_partition(part.node_inx,
- block_ptr->bp_inx)) {
- block_ptr->slurm_part_name
- = xstrdup(part.name);
- break;
- }
- }
- block_ptr->job_running =
- block_info_ptr->block_array[i].job_running;
+ _set_block_partition(part_info_ptr, block_ptr);
+
+ block_ptr->job_list = list_create(slurm_free_block_job_info);
+ if (block_info_ptr->block_array[i].job_list)
+ list_transfer(block_ptr->job_list,
+ block_info_ptr->block_array[i].job_list);
+
if (block_ptr->bg_conn_type[0] >= SELECT_SMALL)
block_ptr->size = 0;
@@ -632,17 +745,17 @@
if (block_ptr->state & BG_BLOCK_ERROR_FLAG)
state = NODE_STATE_ERROR;
- else if (block_ptr->job_running > NO_JOB_RUNNING)
+ else if (list_count(block_ptr->job_list))
state = NODE_STATE_ALLOCATED;
else
state = NODE_STATE_IDLE;
j = 0;
- while (block_ptr->bp_inx[j] >= 0) {
+ while (block_ptr->mp_inx[j] >= 0) {
change_grid_color(
popup_win->grid_button_list,
- block_ptr->bp_inx[j],
- block_ptr->bp_inx[j+1],
+ block_ptr->mp_inx[j],
+ block_ptr->mp_inx[j+1],
block_ptr->color_inx, true,
state);
j += 2;
@@ -905,7 +1018,6 @@
GtkTreeView *tree_view = NULL;
static GtkWidget *display_widget = NULL;
List block_list = NULL;
- int changed = 1;
int j=0;
ListIterator itr = NULL;
sview_block_info_t *sview_block_info_ptr = NULL;
@@ -966,7 +1078,6 @@
|| (part_error_code != SLURM_NO_CHANGE_IN_DATA)) {
goto display_it;
}
- changed = 0;
} else if (block_error_code != SLURM_SUCCESS) {
if (view == ERROR_VIEW)
goto end_it;
@@ -1000,8 +1111,7 @@
if (!part_info_ptr)
goto reset_curs;
- block_list = _create_block_list(part_info_ptr, block_ptr,
- changed);
+ block_list = _create_block_list(part_info_ptr, block_ptr);
if (!block_list)
goto reset_curs;
@@ -1019,11 +1129,11 @@
itr = list_iterator_create(block_list);
while ((sview_block_info_ptr = list_next(itr))) {
j=0;
- while (sview_block_info_ptr->bp_inx[j] >= 0) {
+ while (sview_block_info_ptr->mp_inx[j] >= 0) {
change_grid_color(
grid_button_list,
- sview_block_info_ptr->bp_inx[j],
- sview_block_info_ptr->bp_inx[j+1],
+ sview_block_info_ptr->mp_inx[j],
+ sview_block_info_ptr->mp_inx[j+1],
sview_block_info_ptr->color_inx,
true, 0);
j += 2;
@@ -1083,7 +1193,6 @@
GtkTreeView *tree_view = NULL;
List block_list = NULL;
List send_block_list = NULL;
- int changed = 1;
sview_block_info_t *block_ptr = NULL;
int j=0, i=-1;
hostset_t hostset = NULL;
@@ -1128,8 +1237,6 @@
|| (part_error_code != SLURM_NO_CHANGE_IN_DATA)) {
goto display_it;
}
- changed = 0;
-
} else if (block_error_code != SLURM_SUCCESS) {
if (spec_info->view == ERROR_VIEW)
goto end_it;
@@ -1148,8 +1255,8 @@
}
display_it:
- block_list = _create_block_list(part_info_ptr, block_info_ptr,
- changed);
+ block_list = _create_block_list(part_info_ptr, block_info_ptr);
+
if (!block_list)
return;
@@ -1268,17 +1375,17 @@
if (block_ptr->state & BG_BLOCK_ERROR_FLAG)
state = NODE_STATE_ERROR;
- else if (block_ptr->job_running > NO_JOB_RUNNING)
+ else if (list_count(block_ptr->job_list))
state = NODE_STATE_ALLOCATED;
else
state = NODE_STATE_IDLE;
j=0;
- while (block_ptr->bp_inx[j] >= 0) {
+ while (block_ptr->mp_inx[j] >= 0) {
change_grid_color(
popup_win->grid_button_list,
- block_ptr->bp_inx[j],
- block_ptr->bp_inx[j+1], block_ptr->color_inx,
+ block_ptr->mp_inx[j],
+ block_ptr->mp_inx[j+1], block_ptr->color_inx,
true, state);
j += 2;
}
@@ -1359,7 +1466,7 @@
name);
break;
case NODE_PAGE:
- snprintf(title, 100, "Base Partition(s) in block %s", name);
+ snprintf(title, 100, "Midplane(s) in block %s", name);
break;
case SUBMIT_PAGE:
snprintf(title, 100, "Submit job on %s", name);
@@ -1496,7 +1603,37 @@
while (display_data++) {
if (display_data->id == -1)
break;
- if (cluster_flags & CLUSTER_FLAG_BGL) {
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ switch(display_data->id) {
+ case SORTID_USE:
+ case SORTID_USER:
+ case SORTID_IMAGEBLRTS:
+ case SORTID_IMAGELINUX:
+ case SORTID_IMAGERAMDISK:
+ display_data->name = NULL;
+ break;
+ default:
+ break;
+ }
+ } else if (cluster_flags & CLUSTER_FLAG_BGP) {
+ switch(display_data->id) {
+ case SORTID_USE:
+ case SORTID_IMAGEBLRTS:
+ display_data->name = NULL;
+ break;
+ case SORTID_IMAGELINUX:
+ display_data->name = "Image Cnload";
+ break;
+ case SORTID_IMAGERAMDISK:
+ display_data->name = "Image Ioload";
+ break;
+ case SORTID_USER:
+ display_data->name = "User";
+ break;
+ default:
+ break;
+ }
+ } else if (cluster_flags & CLUSTER_FLAG_BGL) {
switch(display_data->id) {
case SORTID_USE:
display_data->name = "Node Use";
@@ -1510,22 +1647,8 @@
case SORTID_IMAGERAMDISK:
display_data->name = "Image Ramdisk";
break;
- default:
- break;
- }
- } else {
- switch(display_data->id) {
- case SORTID_USE:
- display_data->name = NULL;
- break;
- case SORTID_IMAGEBLRTS:
- display_data->name = NULL;
- break;
- case SORTID_IMAGELINUX:
- display_data->name = "Image Cnload";
- break;
- case SORTID_IMAGERAMDISK:
- display_data->name = "Image Ioload";
+ case SORTID_USER:
+ display_data->name = "User";
break;
default:
break;
diff --git a/src/sview/common.c b/src/sview/common.c
index 5139678..6885c21 100644
--- a/src/sview/common.c
+++ b/src/sview/common.c
@@ -355,7 +355,7 @@
/* Translate a three-digit alpha-numeric value into it's
* base 36 equivalent number */
-static int _xlate_bp_coord(const char *name)
+static int _xlate_mp_coord(const char *name)
{
int i, rc = 0;
@@ -368,38 +368,52 @@
/* Make a BlueGene node name into a numeric representation of
* its location.
- * Value is low_node_coordinate * 1,000 + I/O node (999 if none)
- * with use of base 36 for the node coordinate:
+ * Value is (low_node_coordinate * io_val_max) + I/O node (io_val if none)
+ * with use of base 36 for the node coordinate on an L/P:
* (e.g. bg123[4] -> 1,371,004
* bg[234x235] -> 2,704,999
* bglZZZ -> 46,655,999
*/
-static int _bp_coordinate(const char *name)
+static int _mp_coordinate(const char *name)
{
- int i, io_val = 999, low_val = -1;
+ int i, io_val, io_val_max, low_val = -1;
+
+ /* Since io_val needs to handle all dimensions of the ionode
+ field with Q the number could be much bigger that 999.
+ This will have to be handled when a new system comes with
+ more dims.
+ */
+ if (cluster_flags & CLUSTER_FLAG_BGQ) {
+ io_val = 99999;
+ io_val_max = 100000;
+ } else {
+ io_val = 999;
+ io_val_max = 1000;
+ }
for (i=0; name[i]; i++) {
if (name[i] == '[') {
i++;
if (low_val < 0)
- low_val = _xlate_bp_coord(name+i);
+ low_val = _xlate_mp_coord(name+i);
else
io_val = atoi(name+i);
break;
} else if ((low_val < 0) &&
((name[i] >= '0' && (name[i] <= '9')) ||
(name[i] >= 'A' && (name[i] <= 'Z')))) {
- low_val = _xlate_bp_coord(name+i);
+ low_val = _xlate_mp_coord(name+i);
i += 2;
}
}
if (low_val < 0)
return low_val;
- return ((low_val * 1000) + io_val);
+
+ return ((low_val * io_val_max) + io_val);
}
-static int _sort_iter_compare_func_bp_list(GtkTreeModel *model,
+static int _sort_iter_compare_func_mp_list(GtkTreeModel *model,
GtkTreeIter *a,
GtkTreeIter *b,
gpointer userdata)
@@ -417,7 +431,7 @@
ret = (name1 == NULL) ? -1 : 1;
else {
/* Sort in numeric order based upon coordinates */
- ret = _bp_coordinate(name1) - _bp_coordinate(name2);
+ ret = _mp_coordinate(name1) - _mp_coordinate(name2);
}
cleanup:
g_free(name1);
@@ -954,13 +968,11 @@
replus(col_name);
if (strstr(col_name, "list")) {
char *orig_ptr = col_name;
- if (cluster_flags & CLUSTER_FLAG_BG) {
- xstrsubstitute(col_name, "node", "bp ");
- xstrsubstitute(col_name, "midplane", "bp ");
- } else {
- xstrsubstitute(col_name, "bp ", "node");
+ xstrsubstitute(col_name, "bp ", "midplane");
+ if (cluster_flags & CLUSTER_FLAG_BG)
+ xstrsubstitute(col_name, "node", "midplane");
+ else
xstrsubstitute(col_name, "midplane", "node");
- }
/* Make sure we have the correct pointer here
since xstrsubstitute() could of changed it
@@ -980,7 +992,8 @@
break;
if (!display_data->name)
continue;
- if (!strcasecmp(col_name, display_data->name)) {
+ if (!strncasecmp(col_name, display_data->name,
+ strlen(col_name))) {
display_data->show = TRUE;
break;
}
@@ -1254,11 +1267,11 @@
NULL);
break;
} else if (!strcasecmp(display_data[i].name,
- "BP List")) {
+ "MidplaneList")) {
gtk_tree_sortable_set_sort_func(
GTK_TREE_SORTABLE(treestore),
display_data[i].id,
- _sort_iter_compare_func_bp_list,
+ _sort_iter_compare_func_mp_list,
GINT_TO_POINTER(display_data[i].id),
NULL);
break;
@@ -1874,13 +1887,35 @@
return NULL;
}
+extern void set_for_update(GtkTreeModel *model, int updated)
+{
+ GtkTreePath *path = gtk_tree_path_new_first();
+ GtkTreeIter iter;
+
+ /* mark all current rows as in need of an update. */
+ if (path && gtk_tree_model_get_iter(model, &iter, path)) {
+ /* This process will make sure all iter's in the
+ * tree_store will be mark as needing to be updated.
+ * If it is still 0 after the update then it is old
+ * data and will be removed with remove_old()
+ */
+ while (1) {
+ gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
+ updated, 0, -1);
+ if (!gtk_tree_model_iter_next(model, &iter)) {
+ break;
+ }
+ }
+ }
+}
+
extern void remove_old(GtkTreeModel *model, int updated)
{
GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeIter iter;
int i;
- /* remove all old partitions */
+ /* remove all old objects */
if (gtk_tree_model_get_iter(model, &iter, path)) {
while (1) {
gtk_tree_model_get(model, &iter, updated, &i, -1);
diff --git a/src/sview/defaults.c b/src/sview/defaults.c
index afd5cd1..16d17e6 100644
--- a/src/sview/defaults.c
+++ b/src/sview/defaults.c
@@ -506,7 +506,7 @@
return rc;
}
-static void _init_sview_conf()
+static void _init_sview_conf(void)
{
int i;
diff --git a/src/sview/front_end_info.c b/src/sview/front_end_info.c
index 73c12c1..d5baaf1 100644
--- a/src/sview/front_end_info.c
+++ b/src/sview/front_end_info.c
@@ -187,12 +187,12 @@
gtk_tree_store_set(treestore, iter,
SORTID_BOOT_TIME,
sview_front_end_info_ptr->boot_time,
- SORTID_COLOR,
- sview_colors[sview_front_end_info_ptr->color_inx],
+ SORTID_COLOR, sview_colors[
+ sview_front_end_info_ptr->color_inx],
SORTID_COLOR_INX,
sview_front_end_info_ptr->color_inx,
SORTID_NODE_INX,
- sview_front_end_info_ptr->node_inx,
+ sview_front_end_info_ptr->node_inx,
SORTID_NAME, front_end_ptr->name,
SORTID_REASON, sview_front_end_info_ptr->reason,
SORTID_SLURMD_START_TIME,
@@ -235,7 +235,7 @@
}
itr = list_iterator_create(info_list);
- while ((sview_front_end_info = (sview_front_end_info_t*) list_next(itr))) {
+ while ((sview_front_end_info = list_next(itr))) {
front_end_ptr = sview_front_end_info->front_end_ptr;
/* get the iter, or find out the list is empty goto add */
if (!gtk_tree_model_get_iter(model, &iter, path)) {
@@ -263,8 +263,9 @@
}
}
adding:
- _append_front_end_record(sview_front_end_info, GTK_TREE_STORE(model),
- &iter, line);
+ _append_front_end_record(sview_front_end_info,
+ GTK_TREE_STORE(model),
+ &iter, line);
found:
;
}
@@ -277,8 +278,8 @@
return;
}
-static List _create_front_end_info_list(front_end_info_msg_t *front_end_info_ptr,
- int changed)
+static List _create_front_end_info_list(
+ front_end_info_msg_t *front_end_info_ptr, int changed)
{
char *upper = NULL;
char user[32], time_str[32];
@@ -390,7 +391,7 @@
while (sview_fe_info->node_inx[j] >= 0) {
change_grid_color(popup_win->grid_button_list,
sview_fe_info->node_inx[j],
- sview_fe_info->node_inx[j + 1],
+ sview_fe_info->node_inx[j+1],
sview_fe_info->color_inx,
true, 0);
j += 2;
@@ -822,10 +823,12 @@
switch (type) {
case TAB_CLICKED:
- make_fields_menu(NULL, menu, display_data_front_end, SORTID_CNT);
+ make_fields_menu(NULL, menu, display_data_front_end,
+ SORTID_CNT);
break;
case ROW_CLICKED:
- make_options_menu(tree_view, path, menu, options_data_front_end);
+ make_options_menu(tree_view, path, menu,
+ options_data_front_end);
break;
case ROW_LEFT_CLICKED:
highlight_grid(tree_view, SORTID_NODE_INX,
@@ -1069,7 +1072,7 @@
display_data->name = "Blocks";
break;
case NODE_PAGE:
- display_data->name = "Base Partitions";
+ display_data->name = "Midplanes";
break;
}
} else {
diff --git a/src/sview/grid.c b/src/sview/grid.c
index 9ccb4d2..4d1e359 100644
--- a/src/sview/grid.c
+++ b/src/sview/grid.c
@@ -481,11 +481,11 @@
}
-static int _block_in_node(int *bp_inx, int inx)
+static int _block_in_node(int *mp_inx, int inx)
{
int j=0;
- if (bp_inx[j] >= 0) {
- if ((bp_inx[j] == inx) && (bp_inx[j+1] == inx))
+ if (mp_inx[j] >= 0) {
+ if ((mp_inx[j] == inx) && (mp_inx[j+1] == inx))
return 1;
}
return 0;
@@ -1363,7 +1363,7 @@
}
if (bg_info_ptr->state & BG_BLOCK_ERROR_FLAG)
grid_button->state = NODE_STATE_ERROR;
- else if (bg_info_ptr->job_running > NO_JOB_RUNNING)
+ else if (list_count(bg_info_ptr->job_list))
grid_button->state = NODE_STATE_ALLOCATED;
else
grid_button->state = NODE_STATE_IDLE;
@@ -1583,18 +1583,15 @@
int rc = SLURM_SUCCESS;
node_info_msg_t *node_info_ptr = NULL;
List node_list = NULL;
- int changed = 1;
if ((rc = get_new_info_node(&node_info_ptr, force_refresh))
== SLURM_NO_CHANGE_IN_DATA) {
- changed = 0;
} else if (rc != SLURM_SUCCESS)
return SLURM_ERROR;
select_g_ba_init(node_info_ptr, 0);
- node_list = create_node_info_list(node_info_ptr,
- changed, FALSE);
+ node_list = create_node_info_list(node_info_ptr, FALSE);
if (grid_button_list) {
rc = update_grid_table(main_grid_table, grid_button_list,
node_list);
diff --git a/src/sview/job_info.c b/src/sview/job_info.c
index 05bf164..6773ca9 100644
--- a/src/sview/job_info.c
+++ b/src/sview/job_info.c
@@ -53,9 +53,12 @@
/* Collection of data for printing reports. Like data is combined here */
typedef struct {
int color_inx;
+ GtkTreeIter iter_ptr;
+ bool iter_set;
job_info_t *job_ptr;
int node_cnt;
char *nodes;
+ int pos;
bool small_block;
List step_list;
} sview_job_info_t;
@@ -96,11 +99,6 @@
SORTID_ALPS_RESV_ID,
SORTID_BATCH,
SORTID_BATCH_HOST,
-#ifdef HAVE_BG
- SORTID_NODELIST,
- SORTID_NODELIST_EXC,
- SORTID_NODELIST_REQ,
-#endif
SORTID_BLOCK,
SORTID_COLOR,
SORTID_COLOR_INX,
@@ -136,6 +134,11 @@
SORTID_CPU_REQ,
SORTID_MEM_MIN,
SORTID_TMP_DISK,
+#ifdef HAVE_BG
+ SORTID_NODELIST,
+ SORTID_NODELIST_EXC,
+ SORTID_NODELIST_REQ,
+#endif
SORTID_NAME,
SORTID_NETWORK,
SORTID_NICE,
@@ -293,11 +296,11 @@
{G_TYPE_STRING, SORTID_CPUS, "CPU Count",
FALSE, EDIT_NONE, refresh_job, create_model_job, admin_edit_job},
#ifdef HAVE_BG
- {G_TYPE_STRING, SORTID_NODELIST, "BP List", FALSE, EDIT_NONE,
+ {G_TYPE_STRING, SORTID_NODELIST, "MidplaneList", FALSE, EDIT_NONE,
refresh_job, create_model_job, admin_edit_job},
- {G_TYPE_STRING, SORTID_NODELIST_EXC, "BP List Excluded",
+ {G_TYPE_STRING, SORTID_NODELIST_EXC, "MidplaneList Excluded",
FALSE, EDIT_TEXTBOX, refresh_job, create_model_job, admin_edit_job},
- {G_TYPE_STRING, SORTID_NODELIST_REQ, "BP List Requested",
+ {G_TYPE_STRING, SORTID_NODELIST_REQ, "MidplaneList Requested",
FALSE, EDIT_TEXTBOX, refresh_job, create_model_job, admin_edit_job},
#else
{G_TYPE_STRING, SORTID_NODELIST, "NodeList", FALSE,
@@ -416,7 +419,7 @@
{G_TYPE_STRING, PART_PAGE, "Partition", TRUE, JOB_PAGE},
#ifdef HAVE_BG
{G_TYPE_STRING, BLOCK_PAGE, "Block", TRUE, JOB_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Base Partitions", TRUE, JOB_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Midplanes", TRUE, JOB_PAGE},
#else
{G_TYPE_STRING, BLOCK_PAGE, NULL, TRUE, JOB_PAGE},
{G_TYPE_STRING, NODE_PAGE, "Nodes", TRUE, JOB_PAGE},
@@ -550,14 +553,12 @@
int error_code = SLURM_SUCCESS, i;
char *temp = NULL;
- for (i=0; i<MAX_CANCEL_RETRY; i++) {
- if ((signal == (uint16_t)-1) || (signal == SIGKILL)) {
- signal = 9;
- error_code = slurm_kill_job(job_id, SIGKILL,
- false);
- } else
- error_code = slurm_signal_job(job_id, signal);
-
+ if (signal == (uint16_t)-1)
+ signal = SIGKILL;
+ for (i = 0; i < MAX_CANCEL_RETRY; i++) {
+ /* NOTE: RPC always sent to slurmctld rather than directly
+ * to slurmd daemons */
+ error_code = slurm_kill_job(job_id, signal, false);
if (error_code == 0
|| (errno != ESLURM_TRANSITION_STATE_NO_UPDATE
&& errno != ESLURM_JOB_PENDING))
@@ -591,13 +592,17 @@
int error_code = SLURM_SUCCESS, i;
char *temp = NULL;
- for (i=0; i<MAX_CANCEL_RETRY; i++) {
- if (signal == (uint16_t)-1 || (signal == SIGKILL)) {
- signal = 9;
+ if (signal == (uint16_t)-1)
+ signal = SIGKILL;
+ for (i = 0; i < MAX_CANCEL_RETRY; i++) {
+ /* NOTE: RPC always sent to slurmctld rather than directly
+ * to slurmd daemons */
+ if (signal == SIGKILL) {
error_code = slurm_terminate_job_step(job_id, step_id);
+
} else {
- error_code = slurm_signal_job_step(job_id, step_id,
- signal);
+ error_code = slurm_kill_job_step(job_id, step_id,
+ signal);
}
if (error_code == 0
|| (errno != ESLURM_TRANSITION_STATE_NO_UPDATE
@@ -831,11 +836,12 @@
job_msg->max_nodes = (uint32_t)temp_int;
break;
case SORTID_MEM_MIN:
- temp_int = strtol(new_text, (char **)NULL, 10);
- if (*p == 'k' || *p == 'K')
+ temp_int = strtol(new_text, &p, 10);
+ if (*p == 'g' || *p == 'G')
temp_int *= 1024;
- else if (*p == 'm' || *p == 'M')
+ else if (*p == 't' || *p == 'T')
temp_int *= 1048576;
+
p = slurm_strcasestr((char *)new_text, "cpu");
if (p)
type = "min memory per cpu";
@@ -849,10 +855,10 @@
job_msg->pn_min_memory |= MEM_PER_CPU;
break;
case SORTID_TMP_DISK:
- temp_int = strtol(new_text, (char **)NULL, 10);
- if (*p == 'k' || *p == 'K')
+ temp_int = strtol(new_text, &p, 10);
+ if (*p == 'g' || *p == 'G')
temp_int *= 1024;
- else if (*p == 'm' || *p == 'M')
+ else if (*p == 't' || *p == 'T')
temp_int *= 1048576;
type = "min tmp disk per node";
@@ -1207,7 +1213,17 @@
{
int node_cnt = 0;
- if (IS_JOB_PENDING(job) || IS_JOB_COMPLETING(job)) {
+ /* For PENDING jobs, return the maximum of the requested nodelist,
+ * requested maximum number of nodes, or requested CPUs rounded
+ * to nearest node.
+ *
+ * For COMPLETING jobs, the job->nodes nodelist has already been
+ * altered to list only the nodes still in the comp state, and
+ * thus we count only those nodes toward the total nodes still
+ * allocated to this job.
+ */
+
+ if (IS_JOB_PENDING(job)) {
node_cnt = _nodes_in_list(job->req_nodes);
node_cnt = MAX(node_cnt, job->num_nodes);
} else
@@ -1793,8 +1809,7 @@
}
static void _update_job_record(sview_job_info_t *sview_job_info_ptr,
- GtkTreeStore *treestore,
- GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
char tmp_time_run[40], tmp_time_resize[40], tmp_time_submit[40];
char tmp_time_elig[40], tmp_time_start[40], tmp_time_end[40];
@@ -2006,7 +2021,7 @@
tmp_uname = uid_to_string((uid_t)job_ptr->user_id);
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr,
SORTID_ACCOUNT, job_ptr->account,
SORTID_ALLOC, 1,
SORTID_ALLOC_NODE, tmp_alloc_node,
@@ -2072,7 +2087,7 @@
xfree(tmp_uname);
if (cluster_flags & CLUSTER_FLAG_AIX) {
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr,
SORTID_NETWORK, job_ptr->network, -1);
}
@@ -2108,7 +2123,7 @@
tmp_rotate, sizeof(tmp_rotate),
SELECT_PRINT_ROTATE);
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr,
SORTID_BLOCK, tmp_block,
SORTID_CONNECTION, tmp_conn,
SORTID_GEOMETRY, tmp_geo,
@@ -2128,7 +2143,7 @@
tmp_blrts, sizeof(tmp_blrts),
SELECT_PRINT_BLRTS_IMAGE);
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr,
SORTID_IMAGE_BLRTS, tmp_blrts,
-1);
}
@@ -2140,23 +2155,26 @@
tmp_resv_id, sizeof(tmp_resv_id),
SELECT_PRINT_DATA);
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr,
SORTID_ALPS_RESV_ID, tmp_resv_id,
-1);
}
if (gtk_tree_model_iter_children(GTK_TREE_MODEL(treestore),
- &step_iter, iter))
+ &step_iter,
+ &sview_job_info_ptr->iter_ptr))
_update_info_step(sview_job_info_ptr,
- GTK_TREE_MODEL(treestore), &step_iter, iter);
+ GTK_TREE_MODEL(treestore), &step_iter,
+ &sview_job_info_ptr->iter_ptr);
else
_update_info_step(sview_job_info_ptr,
- GTK_TREE_MODEL(treestore), NULL, iter);
+ GTK_TREE_MODEL(treestore), NULL,
+ &sview_job_info_ptr->iter_ptr);
return;
}
-static void _get_step_nodelist(job_step_info_t *step_ptr, char *buf,
+static void _get_step_nodelist(job_step_info_t *step_ptr, char *buf,
int buf_size)
{
char *ionodes = NULL;
@@ -2378,12 +2396,12 @@
}
static void _append_job_record(sview_job_info_t *sview_job_info_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter,
- int line)
+ GtkTreeStore *treestore)
{
- gtk_tree_store_append(treestore, iter, NULL);
- gtk_tree_store_set(treestore, iter, SORTID_POS, line, -1);
- _update_job_record(sview_job_info_ptr, treestore, iter);
+ gtk_tree_store_append(treestore, &sview_job_info_ptr->iter_ptr, NULL);
+ gtk_tree_store_set(treestore, &sview_job_info_ptr->iter_ptr, SORTID_POS,
+ sview_job_info_ptr->pos, -1);
+ _update_job_record(sview_job_info_ptr, treestore);
}
static void _append_step_record(job_step_info_t *step_ptr,
@@ -2484,65 +2502,76 @@
static void _update_info_job(List info_list,
GtkTreeView *tree_view)
{
- GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeModel *model = gtk_tree_view_get_model(tree_view);
- GtkTreeIter iter;
+ static GtkTreeModel *last_model = NULL;
int jobid = 0;
job_info_t *job_ptr = NULL;
- int line = 0;
- char *host = NULL;
ListIterator itr = NULL;
sview_job_info_t *sview_job_info = NULL;
- /* make sure all the jobs are still here */
- if (gtk_tree_model_get_iter(model, &iter, path)) {
- while (1) {
- gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
- SORTID_UPDATED, 0, -1);
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
- }
- }
+ set_for_update(model, SORTID_UPDATED);
itr = list_iterator_create(info_list);
while ((sview_job_info = (sview_job_info_t*) list_next(itr))) {
job_ptr = sview_job_info->job_ptr;
- /* get the iter, or find out the list is empty goto add */
- if (!gtk_tree_model_get_iter(model, &iter, path)) {
- goto adding;
+
+ /* This means the tree_store changed (added new column
+ or something). */
+ if (last_model != model)
+ sview_job_info->iter_set = false;
+
+ if (sview_job_info->iter_set) {
+ gtk_tree_model_get(model, &sview_job_info->iter_ptr,
+ SORTID_JOBID, &jobid, -1);
+ if (jobid != job_ptr->job_id) /* Bad pointer */
+ sview_job_info->iter_set = false;
}
- line = 0;
- while (1) {
- /* search for the jobid and check to see if
- it is in the list */
- gtk_tree_model_get(model, &iter, SORTID_JOBID,
- &jobid, -1);
- if (jobid == job_ptr->job_id) {
- /* update with new info */
- _update_job_record(sview_job_info,
- GTK_TREE_STORE(model),
- &iter);
- goto found;
+ if (sview_job_info->iter_set)
+ _update_job_record(sview_job_info,
+ GTK_TREE_STORE(model));
+ else {
+ GtkTreePath *path = gtk_tree_path_new_first();
+
+ /* get the iter, or find out the list is empty
+ * goto add */
+ if (gtk_tree_model_get_iter(
+ model, &sview_job_info->iter_ptr, path)) {
+ do {
+ /* search for the jobid and
+ check to see if it is in
+ the list */
+ gtk_tree_model_get(
+ model,
+ &sview_job_info->iter_ptr,
+ SORTID_JOBID,
+ &jobid, -1);
+ if (jobid == job_ptr->job_id) {
+ /* update with new info */
+ _update_job_record(
+ sview_job_info,
+ GTK_TREE_STORE(model));
+ sview_job_info->iter_set = 1;
+ break;
+ }
+ } while (gtk_tree_model_iter_next(
+ model,
+ &sview_job_info->iter_ptr));
}
- line++;
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
+ if (!sview_job_info->iter_set) {
+ _append_job_record(sview_job_info,
+ GTK_TREE_STORE(model));
+ sview_job_info->iter_set = true;
}
+
+ gtk_tree_path_free(path);
}
- adding:
- _append_job_record(sview_job_info, GTK_TREE_STORE(model),
- &iter, line);
- found:
- ;
}
list_iterator_destroy(itr);
- if (host)
- free(host);
- gtk_tree_path_free(path);
+
/* remove all old jobs */
remove_old(model, SORTID_UPDATED);
+ last_model = model;
return;
}
@@ -2569,10 +2598,12 @@
static List _create_job_info_list(job_info_msg_t *job_info_ptr,
job_step_info_response_msg_t *step_info_ptr,
- int changed, int want_odd_states)
+ int want_odd_states)
{
static List info_list = NULL;
static List odd_info_list = NULL;
+ static job_info_msg_t *last_job_info_ptr = NULL;
+ static job_step_info_response_msg_t *last_step_info_ptr = NULL;
int i = 0, j = 0;
sview_job_info_t *sview_job_info_ptr = NULL;
job_info_t *job_ptr = NULL;
@@ -2580,9 +2611,12 @@
char *ionodes = NULL;
char tmp_char[50];
- if (!changed && info_list) {
+ if (info_list && (job_info_ptr == last_job_info_ptr)
+ && (step_info_ptr == last_step_info_ptr))
goto update_color;
- }
+
+ last_job_info_ptr = job_info_ptr;
+ last_step_info_ptr = step_info_ptr;
if (info_list) {
list_flush(info_list);
@@ -2602,6 +2636,7 @@
sview_job_info_ptr = xmalloc(sizeof(sview_job_info_t));
sview_job_info_ptr->job_ptr = job_ptr;
sview_job_info_ptr->step_list = list_create(NULL);
+ sview_job_info_ptr->pos = i;
sview_job_info_ptr->node_cnt = 0;
sview_job_info_ptr->color_inx =
job_ptr->job_id % sview_colors_cnt;
@@ -3154,7 +3189,6 @@
GtkTreeView *tree_view = NULL;
static GtkWidget *display_widget = NULL;
List info_list = NULL;
- int changed = 1;
int j, k;
sview_job_info_t *sview_job_info_ptr = NULL;
job_info_t *job_ptr = NULL;
@@ -3213,7 +3247,6 @@
if ((!display_widget || view == ERROR_VIEW)
|| (job_error_code != SLURM_NO_CHANGE_IN_DATA))
goto display_it;
- changed = 0;
} else if (step_error_code != SLURM_SUCCESS) {
if (view == ERROR_VIEW)
goto end_it;
@@ -3230,8 +3263,7 @@
}
display_it:
- info_list = _create_job_info_list(job_info_ptr, step_info_ptr,
- changed, 0);
+ info_list = _create_job_info_list(job_info_ptr, step_info_ptr, 0);
if (!info_list)
goto reset_curs;
@@ -3341,7 +3373,6 @@
GtkTreeView *tree_view = NULL;
List info_list = NULL;
List send_info_list = NULL;
- int changed = 1;
int i=-1, j, k;
sview_job_info_t *sview_job_info_ptr = NULL;
job_info_t *job_ptr = NULL;
@@ -3390,7 +3421,6 @@
|| spec_info->view == ERROR_VIEW)
|| (job_error_code != SLURM_NO_CHANGE_IN_DATA))
goto display_it;
- changed = 0;
} else if (step_error_code != SLURM_SUCCESS) {
if (spec_info->view == ERROR_VIEW)
goto end_it;
@@ -3407,8 +3437,7 @@
goto end_it;
}
display_it:
- info_list = _create_job_info_list(job_info_ptr, step_info_ptr,
- changed, 1);
+ info_list = _create_job_info_list(job_info_ptr, step_info_ptr, 1);
if (!info_list)
return;
@@ -3631,7 +3660,7 @@
char *type;
if (cluster_flags & CLUSTER_FLAG_BG)
- type = "Base partition";
+ type = "Midplane";
else
type = "Node";
@@ -4238,13 +4267,13 @@
display_data->name = "Image Mloader";
break;
case SORTID_NODELIST:
- display_data->name = "BP List";
+ display_data->name = "MidplaneList";
break;
case SORTID_NODELIST_EXC:
- display_data->name = "BP List Excluded";
+ display_data->name = "MidplaneList Excluded";
break;
case SORTID_NODELIST_REQ:
- display_data->name = "BP List Requested";
+ display_data->name = "MidplaneList Requested";
break;
default:
break;
@@ -4323,7 +4352,7 @@
display_data->name = "Blocks";
break;
case NODE_PAGE:
- display_data->name = "Base Partitions";
+ display_data->name = "Midplanes";
break;
}
} else {
diff --git a/src/sview/node_info.c b/src/sview/node_info.c
index 9f2f273..cbaa334 100644
--- a/src/sview/node_info.c
+++ b/src/sview/node_info.c
@@ -54,6 +54,7 @@
SORTID_NODE_HOSTNAME,
SORTID_MEMORY, /* RealMemory */
SORTID_REASON,
+ SORTID_RACK_MP,
SORTID_SLURMD_START_TIME,
SORTID_SOCKETS,
SORTID_STATE,
@@ -75,35 +76,43 @@
/*these are the settings to apply for the user
* on the first startup after a fresh slurm install.*/
-static char *_initial_page_opts = "Name,State,CPU_Count,Used_CPU_Count,"
- "Error_CPU_Count,Cores,Sockets,Threads,Real_Memory,Tmp_Disk";
+static char *_initial_page_opts = "Name,RackMidplane,State,CPU_Count,"
+ "Used_CPU_Count,Error_CPU_Count,CoresPerSocket,Sockets,ThreadsPerCore,"
+ "Real_Memory,Tmp_Disk";
static display_data_t display_data_node[] = {
{G_TYPE_INT, SORTID_POS, NULL, FALSE, EDIT_NONE, refresh_node,
create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_NAME, "Name", FALSE, EDIT_NONE, refresh_node,
create_model_node, admin_edit_node},
+ {G_TYPE_STRING, SORTID_COLOR, NULL, TRUE, EDIT_COLOR, refresh_node,
+ create_model_node, admin_edit_node},
+#ifdef HAVE_BG
+ {G_TYPE_STRING, SORTID_RACK_MP, "RackMidplane", FALSE, EDIT_NONE,
+ refresh_node, create_model_node, admin_edit_node},
+#else
+ {G_TYPE_STRING, SORTID_RACK_MP, NULL, TRUE, EDIT_NONE, refresh_node,
+ create_model_node, admin_edit_node},
+#endif
{G_TYPE_STRING, SORTID_NODE_ADDR, "NodeAddr", FALSE, EDIT_NONE,
refresh_node, create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_NODE_HOSTNAME, "NodeHostName", FALSE, EDIT_NONE,
refresh_node, create_model_node, admin_edit_node},
- {G_TYPE_STRING, SORTID_COLOR, NULL, TRUE, EDIT_COLOR, refresh_node,
- create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_STATE, "State", FALSE, EDIT_MODEL, refresh_node,
create_model_node, admin_edit_node},
{G_TYPE_INT, SORTID_STATE_NUM, NULL, FALSE, EDIT_NONE, refresh_node,
create_model_node, admin_edit_node},
- {G_TYPE_INT, SORTID_CPUS, "CPU Count", FALSE, EDIT_NONE, refresh_node,
- create_model_node, admin_edit_node},
+ {G_TYPE_STRING, SORTID_CPUS, "CPU Count", FALSE,
+ EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_USED_CPUS, "Used CPU Count", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_ERR_CPUS, "Error CPU Count", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
- {G_TYPE_INT, SORTID_CORES, "Cores", FALSE,
+ {G_TYPE_INT, SORTID_CORES, "CoresPerSocket", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
{G_TYPE_INT, SORTID_SOCKETS, "Sockets", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
- {G_TYPE_INT, SORTID_THREADS, "Threads", FALSE,
+ {G_TYPE_INT, SORTID_THREADS, "ThreadsPerCore", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
{G_TYPE_STRING, SORTID_MEMORY, "Real Memory", FALSE,
EDIT_NONE, refresh_node, create_model_node, admin_edit_node},
@@ -132,11 +141,11 @@
{G_TYPE_INT, SORTID_POS, NULL, FALSE, EDIT_NONE},
{G_TYPE_STRING, INFO_PAGE, "Full Info", TRUE, NODE_PAGE},
#ifdef HAVE_BG
- {G_TYPE_STRING, NODE_PAGE, "Drain Base Partition", TRUE, ADMIN_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Resume Base Partition", TRUE, ADMIN_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Set Base Partition Down",
+ {G_TYPE_STRING, NODE_PAGE, "Drain Midplane", TRUE, ADMIN_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Resume Midplane", TRUE, ADMIN_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Set Midplane Down",
TRUE, ADMIN_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Make Base Partition Idle",
+ {G_TYPE_STRING, NODE_PAGE, "Make Midplane Idle",
TRUE, ADMIN_PAGE},
#else
{G_TYPE_STRING, NODE_PAGE, "Drain Node", TRUE, ADMIN_PAGE},
@@ -180,6 +189,12 @@
SORTID_NAME),
node_ptr->name);
+ if (sview_node_info_ptr->rack_mp)
+ add_display_treestore_line(update, treestore, &iter,
+ find_col_name(display_data_node,
+ SORTID_RACK_MP),
+ sview_node_info_ptr->rack_mp);
+
add_display_treestore_line(update, treestore, &iter,
find_col_name(display_data_node,
SORTID_NODE_ADDR),
@@ -309,13 +324,17 @@
}
static void _update_node_record(sview_node_info_t *sview_node_info_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus;
node_info_t *node_ptr = sview_node_info_ptr->node_ptr;
- char tmp_disk[20], tmp_err_cpus[20], tmp_mem[20], tmp_used_cpus[20];
+ char tmp_disk[20], tmp_cpus[20], tmp_err_cpus[20],
+ tmp_mem[20], tmp_used_cpus[20];
char *tmp_state_lower, *tmp_state_upper;
+ convert_num_unit((float)node_ptr->cpus, tmp_cpus,
+ sizeof(tmp_cpus), UNIT_NONE);
+
select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ALLOCATED,
@@ -361,14 +380,14 @@
/* Combining these records provides a slight performance improvement */
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_node_info_ptr->iter_ptr,
SORTID_ARCH, node_ptr->arch,
SORTID_BOOT_TIME, sview_node_info_ptr->boot_time,
SORTID_COLOR,
sview_colors[sview_node_info_ptr->pos
% sview_colors_cnt],
- SORTID_CORES, node_ptr->cpus,
- SORTID_CPUS, node_ptr->cpus,
+ SORTID_CORES, node_ptr->cores,
+ SORTID_CPUS, tmp_cpus,
SORTID_DISK, tmp_disk,
SORTID_ERR_CPUS, tmp_err_cpus,
SORTID_FEATURES, node_ptr->features,
@@ -377,6 +396,7 @@
SORTID_NAME, node_ptr->name,
SORTID_NODE_ADDR, node_ptr->node_addr,
SORTID_NODE_HOSTNAME, node_ptr->node_hostname,
+ SORTID_RACK_MP, sview_node_info_ptr->rack_mp,
SORTID_REASON, sview_node_info_ptr->reason,
SORTID_SLURMD_START_TIME,
sview_node_info_ptr->slurmd_start_time,
@@ -395,39 +415,34 @@
}
static void _append_node_record(sview_node_info_t *sview_node_info,
- GtkTreeStore *treestore, GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
- gtk_tree_store_append(treestore, iter, NULL);
- gtk_tree_store_set(treestore, iter, SORTID_POS,
+ gtk_tree_store_append(treestore, &sview_node_info->iter_ptr, NULL);
+ gtk_tree_store_set(treestore, &sview_node_info->iter_ptr, SORTID_POS,
sview_node_info->pos, -1);
- _update_node_record(sview_node_info, treestore, iter);
+ _update_node_record(sview_node_info, treestore);
}
static void _update_info_node(List info_list, GtkTreeView *tree_view)
{
- GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeModel *model = gtk_tree_view_get_model(tree_view);
- GtkTreeIter iter;
+ static GtkTreeModel *last_model = NULL;
node_info_t *node_ptr = NULL;
char *name;
ListIterator itr = NULL;
sview_node_info_t *sview_node_info = NULL;
- /* get the iter, or find out the list is empty goto add */
- if (gtk_tree_model_get_iter(model, &iter, path)) {
- /* make sure all the partitions are still here */
- while (1) {
- gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
- SORTID_UPDATED, 0, -1);
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
- }
- }
+ set_for_update(model, SORTID_UPDATED);
itr = list_iterator_create(info_list);
while ((sview_node_info = (sview_node_info_t*) list_next(itr))) {
node_ptr = sview_node_info->node_ptr;
+
+ /* This means the tree_store changed (added new column
+ or something). */
+ if (last_model != model)
+ sview_node_info->iter_set = false;
+
if (sview_node_info->iter_set) {
gtk_tree_model_get(model, &sview_node_info->iter_ptr,
SORTID_NAME, &name, -1);
@@ -435,23 +450,55 @@
sview_node_info->iter_set = false;
//g_print("bad node iter pointer\n");
}
+ g_free(name);
}
if (sview_node_info->iter_set) {
_update_node_record(sview_node_info,
- GTK_TREE_STORE(model),
- &sview_node_info->iter_ptr);
+ GTK_TREE_STORE(model));
} else {
- _append_node_record(sview_node_info,
- GTK_TREE_STORE(model),
- &sview_node_info->iter_ptr);
- sview_node_info->iter_set = true;
+ GtkTreePath *path = gtk_tree_path_new_first();
+
+ /* get the iter, or find out the list is empty
+ * goto add */
+ if (gtk_tree_model_get_iter(
+ model, &sview_node_info->iter_ptr, path)) {
+ do {
+ /* search for the jobid and
+ check to see if it is in
+ the list */
+ gtk_tree_model_get(
+ model,
+ &sview_node_info->iter_ptr,
+ SORTID_NAME,
+ &name, -1);
+ if (!strcmp(name, node_ptr->name)) {
+ /* update with new info */
+ g_free(name);
+ _update_node_record(
+ sview_node_info,
+ GTK_TREE_STORE(model));
+ sview_node_info->iter_set = 1;
+ break;
+ }
+ g_free(name);
+ } while (gtk_tree_model_iter_next(
+ model,
+ &sview_node_info->iter_ptr));
+ }
+
+ if (!sview_node_info->iter_set) {
+ _append_node_record(sview_node_info,
+ GTK_TREE_STORE(model));
+ sview_node_info->iter_set = true;
+ }
+ gtk_tree_path_free(path);
}
}
list_iterator_destroy(itr);
- gtk_tree_path_free(path);
/* remove all old nodes */
remove_old(model, SORTID_UPDATED);
+ last_model = model;
}
static void _node_info_list_del(void *object)
@@ -461,6 +508,7 @@
if (sview_node_info) {
xfree(sview_node_info->slurmd_start_time);
xfree(sview_node_info->boot_time);
+ xfree(sview_node_info->rack_mp);
xfree(sview_node_info->reason);
xfree(sview_node_info);
}
@@ -512,7 +560,7 @@
GtkTreeModel *model = NULL;
if (cluster_flags & CLUSTER_FLAG_BG)
- temp = "BP NOT FOUND\n";
+ temp = "MIDPLANE NOT FOUND\n";
else
temp = "NODE NOT FOUND\n";
/* only time this will be run so no update */
@@ -584,19 +632,23 @@
/* don't destroy the list from this function */
extern List create_node_info_list(node_info_msg_t *node_info_ptr,
- int changed, bool by_partition)
+ bool by_partition)
{
static List info_list = NULL;
+ static node_info_msg_t *last_node_info_ptr = NULL;
int i = 0;
sview_node_info_t *sview_node_info_ptr = NULL;
node_info_t *node_ptr = NULL;
char user[32], time_str[32];
if (!by_partition) {
- if (!node_info_ptr || (!changed && info_list))
+ if (!node_info_ptr
+ || (info_list && (node_info_ptr == last_node_info_ptr)))
goto update_color;
}
+ last_node_info_ptr = node_info_ptr;
+
if (info_list)
list_flush(info_list);
else
@@ -607,6 +659,7 @@
}
for (i=0; i<node_info_ptr->record_count; i++) {
+ char *select_reason_str = NULL;
node_ptr = &(node_info_ptr->node_array[i]);
if (!node_ptr->name || (node_ptr->name[0] == '\0'))
@@ -625,6 +678,10 @@
sview_node_info_ptr->node_ptr = node_ptr;
sview_node_info_ptr->pos = i;
+ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
+ SELECT_NODEDATA_RACK_MP,
+ 0, &sview_node_info_ptr->rack_mp);
+
if (node_ptr->reason &&
(node_ptr->reason_uid != NO_VAL) && node_ptr->reason_time) {
struct passwd *pw = NULL;
@@ -638,8 +695,24 @@
time_str, sizeof(time_str));
sview_node_info_ptr->reason = xstrdup_printf(
"%s [%s@%s]", node_ptr->reason, user, time_str);
- } else
+ } else if (node_ptr->reason)
sview_node_info_ptr->reason = xstrdup(node_ptr->reason);
+
+ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
+ SELECT_NODEDATA_EXTRA_INFO,
+ 0, &select_reason_str);
+ if (select_reason_str && select_reason_str[0]) {
+ if (sview_node_info_ptr->reason)
+ xstrfmtcat(sview_node_info_ptr->reason, "\n%s",
+ select_reason_str);
+ else {
+ sview_node_info_ptr->reason = select_reason_str;
+ select_reason_str = NULL;
+ }
+ }
+ xfree(select_reason_str);
+
+
if (node_ptr->boot_time) {
slurm_make_time_str(&node_ptr->boot_time,
time_str, sizeof(time_str));
@@ -1166,7 +1239,6 @@
GtkTreeView *tree_view = NULL;
static GtkWidget *display_widget = NULL;
List info_list = NULL;
- int changed = 1;
int i = 0, sort_key;
sview_node_info_t *sview_node_info_ptr = NULL;
ListIterator itr = NULL;
@@ -1202,7 +1274,6 @@
== SLURM_NO_CHANGE_IN_DATA) {
if (!display_widget || view == ERROR_VIEW)
goto display_it;
- changed = 0;
} else if (error_code != SLURM_SUCCESS) {
if (view == ERROR_VIEW)
goto end_it;
@@ -1219,7 +1290,7 @@
}
display_it:
- info_list = create_node_info_list(node_info_ptr, changed, FALSE);
+ info_list = create_node_info_list(node_info_ptr, FALSE);
if (!info_list)
goto reset_curs;
i = 0;
@@ -1309,7 +1380,6 @@
List info_list = NULL;
List send_info_list = NULL;
ListIterator itr = NULL;
- int changed = 1;
sview_node_info_t *sview_node_info_ptr = NULL;
node_info_t *node_ptr = NULL;
hostlist_t hostlist = NULL;
@@ -1331,7 +1401,6 @@
== SLURM_NO_CHANGE_IN_DATA) {
if (!spec_info->display_widget || spec_info->view == ERROR_VIEW)
goto display_it;
- changed = 0;
} else if (error_code != SLURM_SUCCESS) {
if (spec_info->view == ERROR_VIEW)
goto end_it;
@@ -1350,7 +1419,7 @@
}
display_it:
- info_list = create_node_info_list(node_info_ptr, changed, FALSE);
+ info_list = create_node_info_list(node_info_ptr, FALSE);
if (!info_list)
return;
@@ -1559,7 +1628,7 @@
char *node;
if (cluster_flags & CLUSTER_FLAG_BG)
- node = "Base partition";
+ node = "Midplane";
else
node = "Node";
@@ -1694,7 +1763,7 @@
if (!strcasecmp("Update Features", type)
|| !strcasecmp("Update Node Features", type)
- || !strcasecmp("Update Base Partition Features",
+ || !strcasecmp("Update Midplane Features",
type)) { /* update features */
update_features_node(GTK_DIALOG(popup), name, old_value);
} else if (!strcasecmp("Update Gres", type)) { /* update gres */
@@ -1709,7 +1778,26 @@
extern void cluster_change_node(void)
{
- display_data_t *display_data = options_data_node;
+ display_data_t *display_data = display_data_node;
+ while (display_data++) {
+ if (display_data->id == -1)
+ break;
+ if (cluster_flags & CLUSTER_FLAG_BG) {
+ switch(display_data->id) {
+ case SORTID_RACK_MP:
+ display_data->name = "RackMidplane";
+ break;
+ }
+ } else {
+ switch(display_data->id) {
+ case SORTID_RACK_MP:
+ display_data->name = NULL;
+ break;
+ }
+ }
+ }
+
+ display_data = options_data_node;
while (display_data++) {
if (display_data->id == -1)
break;
@@ -1722,14 +1810,14 @@
if (!display_data->name) {
} else if (!strcmp(display_data->name, "Drain Node"))
- display_data->name = "Drain Base Partition";
+ display_data->name = "Drain Midplane";
else if (!strcmp(display_data->name, "Resume Node"))
- display_data->name = "Resume Base Partition";
+ display_data->name = "Resume Midplane";
else if (!strcmp(display_data->name, "Put Node Down"))
- display_data->name = "Put Base Partition Down";
+ display_data->name = "Put Midplane Down";
else if (!strcmp(display_data->name, "Make Node Idle"))
display_data->name =
- "Make Base Partition Idle";
+ "Make Midplane Idle";
} else {
switch(display_data->id) {
case BLOCK_PAGE:
@@ -1739,17 +1827,18 @@
if (!display_data->name) {
} else if (!strcmp(display_data->name,
- "Drain Base Partitions"))
+ "Drain Midplanes"))
display_data->name = "Drain Nodes";
else if (!strcmp(display_data->name,
- "Resume Base Partitions"))
+ "Resume Midplanes"))
display_data->name = "Resume Nodes";
else if (!strcmp(display_data->name,
- "Put Base Partitions Down"))
+ "Put Midplanes Down"))
display_data->name = "Put Nodes Down";
else if (!strcmp(display_data->name,
- "Make Base Partitions Idle"))
+ "Make Midplanes Idle"))
display_data->name = "Make Nodes Idle";
}
}
+ get_info_node(NULL, NULL);
}
diff --git a/src/sview/part_info.c b/src/sview/part_info.c
index 90136ec..47f5917 100644
--- a/src/sview/part_info.c
+++ b/src/sview/part_info.c
@@ -55,10 +55,14 @@
/* Collection of data for printing reports. Like data is combined here */
typedef struct {
int color_inx;
+ GtkTreeIter iter_ptr;
+ bool iter_set;
/* part_info contains partition, avail, max_time, job_size,
* root, share, groups */
partition_info_t* part_ptr;
+ int pos;
List sub_list;
+ sview_part_sub_t sub_part_total;
} sview_part_info_t;
enum {
@@ -71,10 +75,6 @@
enum {
SORTID_POS = POS_LOC,
SORTID_ALTERNATE,
-#ifdef HAVE_BG
- SORTID_NODELIST,
- SORTID_NODES_ALLOWED,
-#endif
SORTID_COLOR,
SORTID_COLOR_INX,
SORTID_CPUS,
@@ -85,6 +85,10 @@
SORTID_HIDDEN,
SORTID_JOB_SIZE,
SORTID_MEM,
+#ifdef HAVE_BG
+ SORTID_NODELIST,
+ SORTID_NODES_ALLOWED,
+#endif
SORTID_NAME,
#ifndef HAVE_BG
SORTID_NODELIST,
@@ -166,7 +170,7 @@
{G_TYPE_STRING, SORTID_REASON, "Reason", FALSE,
EDIT_NONE, refresh_part, create_model_part, admin_edit_part},
#ifdef HAVE_BG
- {G_TYPE_STRING, SORTID_NODELIST, "BP List", FALSE,
+ {G_TYPE_STRING, SORTID_NODELIST, "MidplaneList", FALSE,
EDIT_TEXTBOX, refresh_part, create_model_part, admin_edit_part},
#else
{G_TYPE_STRING, SORTID_NODELIST, "NodeList", FALSE,
@@ -222,7 +226,7 @@
{G_TYPE_STRING, SORTID_REASON, "Reason", FALSE,
EDIT_TEXTBOX, refresh_part, _create_model_part2, admin_edit_part},
#ifdef HAVE_BG
- {G_TYPE_STRING, SORTID_NODELIST, "BP List", FALSE,
+ {G_TYPE_STRING, SORTID_NODELIST, "MidplaneList", FALSE,
EDIT_TEXTBOX, refresh_part, _create_model_part2, admin_edit_part},
#else
{G_TYPE_STRING, SORTID_NODELIST, "NodeList", FALSE,
@@ -237,15 +241,15 @@
{G_TYPE_STRING, PART_PAGE, "Edit Partition", TRUE, ADMIN_PAGE},
{G_TYPE_STRING, PART_PAGE, "Remove Partition", TRUE, ADMIN_PAGE},
#ifdef HAVE_BG
- {G_TYPE_STRING, PART_PAGE, "Drain Base Partitions",
+ {G_TYPE_STRING, PART_PAGE, "Drain Midplanes",
TRUE, ADMIN_PAGE | EXTRA_NODES},
- {G_TYPE_STRING, PART_PAGE, "Resume Base Partitions",
+ {G_TYPE_STRING, PART_PAGE, "Resume Midplanes",
TRUE, ADMIN_PAGE | EXTRA_NODES},
- {G_TYPE_STRING, PART_PAGE, "Put Base Partitions Down",
+ {G_TYPE_STRING, PART_PAGE, "Put Midplanes Down",
TRUE, ADMIN_PAGE | EXTRA_NODES},
- {G_TYPE_STRING, PART_PAGE, "Make Base Partitions Idle",
+ {G_TYPE_STRING, PART_PAGE, "Make Midplanes Idle",
TRUE, ADMIN_PAGE | EXTRA_NODES},
- {G_TYPE_STRING, PART_PAGE, "Update Base Partition Features",
+ {G_TYPE_STRING, PART_PAGE, "Update Midplane Features",
TRUE, ADMIN_PAGE | EXTRA_NODES},
#else
{G_TYPE_STRING, PART_PAGE, "Drain Nodes",
@@ -264,7 +268,7 @@
{G_TYPE_STRING, JOB_PAGE, "Jobs", TRUE, PART_PAGE},
#ifdef HAVE_BG
{G_TYPE_STRING, BLOCK_PAGE, "Blocks", TRUE, PART_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Base Partitions", TRUE, PART_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Midplanes", TRUE, PART_PAGE},
#else
{G_TYPE_STRING, BLOCK_PAGE, NULL, TRUE, PART_PAGE},
{G_TYPE_STRING, NODE_PAGE, "Nodes", TRUE, PART_PAGE},
@@ -829,66 +833,30 @@
int update)
{
GtkTreeIter iter;
- ListIterator itr = NULL;
char time_buf[20], tmp_buf[20];
char tmp_cnt[8];
char tmp_cnt1[8];
char tmp_cnt2[8];
partition_info_t *part_ptr = sview_part_info->part_ptr;
sview_part_sub_t *sview_part_sub = NULL;
- sview_part_sub_t *temp_part_sub = NULL;
- sview_part_sub_t alloc_part_sub;
- sview_part_sub_t idle_part_sub;
- sview_part_sub_t other_part_sub;
char ind_cnt[1024];
char *temp_char = NULL;
uint16_t temp_uint16 = 0;
- int global_set = 0, i;
+ int i;
int yes_no = -1;
int up_down = -1;
uint32_t limit_set = NO_VAL;
GtkTreeStore *treestore =
GTK_TREE_STORE(gtk_tree_view_get_model(treeview));
- memset(&alloc_part_sub, 0, sizeof(sview_part_sub_t));
- memset(&idle_part_sub, 0, sizeof(sview_part_sub_t));
- memset(&other_part_sub, 0, sizeof(sview_part_sub_t));
-
- itr = list_iterator_create(sview_part_info->sub_list);
- while ((sview_part_sub = list_next(itr))) {
- if (sview_part_sub->node_state == NODE_STATE_IDLE)
- temp_part_sub = &idle_part_sub;
- else if (sview_part_sub->node_state == NODE_STATE_ALLOCATED)
- temp_part_sub = &alloc_part_sub;
- else
- temp_part_sub = &other_part_sub;
- temp_part_sub->node_cnt += sview_part_sub->node_cnt;
- temp_part_sub->cpu_alloc_cnt += sview_part_sub->cpu_alloc_cnt;
- temp_part_sub->cpu_error_cnt += sview_part_sub->cpu_error_cnt;
- temp_part_sub->cpu_idle_cnt += sview_part_sub->cpu_idle_cnt;
- /* temp_part_sub->disk_total += sview_part_sub->disk_total; */
-/* temp_part_sub->mem_total += sview_part_sub->mem_total; */
-
- if (!global_set) {
- global_set = 1;
- /* store features and reasons in the others
- group */
- other_part_sub.features = sview_part_sub->features;
- other_part_sub.reason = sview_part_sub->reason;
- other_part_sub.disk_total = sview_part_sub->disk_total;
- other_part_sub.mem_total = sview_part_sub->mem_total;
- } else {
- other_part_sub.disk_total += sview_part_sub->disk_total;
- other_part_sub.mem_total += sview_part_sub->mem_total;
- }
- }
- list_iterator_destroy(itr);
-
- convert_num_unit((float)alloc_part_sub.node_cnt,
+ convert_num_unit((float)sview_part_info->sub_part_total.cpu_alloc_cnt
+ / cpus_per_node,
tmp_cnt, sizeof(tmp_cnt), UNIT_NONE);
- convert_num_unit((float)idle_part_sub.node_cnt,
+ convert_num_unit((float)sview_part_info->sub_part_total.cpu_idle_cnt
+ / cpus_per_node,
tmp_cnt1, sizeof(tmp_cnt1), UNIT_NONE);
- convert_num_unit((float)other_part_sub.node_cnt,
+ convert_num_unit((float)sview_part_info->sub_part_total.cpu_error_cnt
+ / cpus_per_node,
tmp_cnt2, sizeof(tmp_cnt2), UNIT_NONE);
snprintf(ind_cnt, sizeof(ind_cnt), "%s/%s/%s",
tmp_cnt, tmp_cnt1, tmp_cnt2);
@@ -960,7 +928,8 @@
temp_char = time_buf;
break;
case SORTID_MEM:
- convert_num_unit((float)other_part_sub.mem_total,
+ convert_num_unit((float)sview_part_info->
+ sub_part_total.mem_total,
tmp_cnt, sizeof(tmp_cnt),
UNIT_MEGA);
temp_char = tmp_cnt;
@@ -1031,7 +1000,8 @@
break;
case SORTID_TMP_DISK:
convert_num_unit(
- (float)other_part_sub.disk_total,
+ (float)sview_part_info->sub_part_total.
+ disk_total,
time_buf, sizeof(time_buf), UNIT_NONE);
temp_char = time_buf;
break;
@@ -1084,8 +1054,7 @@
}
static void _update_part_record(sview_part_info_t *sview_part_info,
- GtkTreeStore *treestore,
- GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
char tmp_prio[40], tmp_size[40], tmp_share_buf[40], tmp_time[40];
char tmp_max_nodes[40], tmp_min_nodes[40], tmp_grace[40];
@@ -1200,7 +1169,7 @@
/* Combining these records provides a slight performance improvement
* NOTE: Some of these fields are cleared here and filled in based upon
* the configuration of nodes within this partition. */
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_part_info->iter_ptr,
SORTID_ALTERNATE, tmp_alt,
SORTID_COLOR,
sview_colors[sview_part_info->color_inx],
@@ -1235,12 +1204,15 @@
-1);
if (gtk_tree_model_iter_children(GTK_TREE_MODEL(treestore),
- &sub_iter, iter))
+ &sub_iter,
+ &sview_part_info->iter_ptr))
_subdivide_part(sview_part_info,
- GTK_TREE_MODEL(treestore), &sub_iter, iter);
+ GTK_TREE_MODEL(treestore), &sub_iter,
+ &sview_part_info->iter_ptr);
else
_subdivide_part(sview_part_info,
- GTK_TREE_MODEL(treestore), NULL, iter);
+ GTK_TREE_MODEL(treestore), NULL,
+ &sview_part_info->iter_ptr);
return;
}
@@ -1351,12 +1323,12 @@
}
static void _append_part_record(sview_part_info_t *sview_part_info,
- GtkTreeStore *treestore, GtkTreeIter *iter,
- int line)
+ GtkTreeStore *treestore)
{
- gtk_tree_store_append(treestore, iter, NULL);
- gtk_tree_store_set(treestore, iter, SORTID_POS, line, -1);
- _update_part_record(sview_part_info, treestore, iter);
+ gtk_tree_store_append(treestore, &sview_part_info->iter_ptr, NULL);
+ gtk_tree_store_set(treestore, &sview_part_info->iter_ptr,
+ SORTID_POS, sview_part_info->pos, -1);
+ _update_part_record(sview_part_info, treestore);
}
static void _append_part_sub_record(sview_part_sub_t *sview_part_sub,
@@ -1373,68 +1345,76 @@
static void _update_info_part(List info_list,
GtkTreeView *tree_view)
{
- GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeModel *model = gtk_tree_view_get_model(tree_view);
- GtkTreeIter iter;
+ static GtkTreeModel *last_model = NULL;
partition_info_t *part_ptr = NULL;
- int line = 0;
- char *host = NULL, *part_name = NULL;
+ char *name = NULL;
ListIterator itr = NULL;
sview_part_info_t *sview_part_info = NULL;
- /* get the iter, or find out the list is empty goto add */
- if (gtk_tree_model_get_iter(model, &iter, path)) {
- /* make sure all the partitions are still here */
- while (1) {
- gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
- SORTID_UPDATED, 0, -1);
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
- }
- }
+ set_for_update(model, SORTID_UPDATED);
itr = list_iterator_create(info_list);
while ((sview_part_info = (sview_part_info_t*) list_next(itr))) {
part_ptr = sview_part_info->part_ptr;
- /* get the iter, or find out the list is empty goto add */
- if (!gtk_tree_model_get_iter(model, &iter, path)) {
- goto adding;
- }
- line = 0;
- while (1) {
- /* search for the jobid and check to see if
- it is in the list */
- gtk_tree_model_get(model, &iter, SORTID_NAME,
- &part_name, -1);
- if (!strcmp(part_name, part_ptr->name)) {
- /* update with new info */
- g_free(part_name);
- _update_part_record(sview_part_info,
- GTK_TREE_STORE(model),
- &iter);
- goto found;
- }
- g_free(part_name);
+ /* This means the tree_store changed (added new column
+ or something). */
+ if (last_model != model)
+ sview_part_info->iter_set = false;
- line++;
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
+ if (sview_part_info->iter_set) {
+ gtk_tree_model_get(model, &sview_part_info->iter_ptr,
+ SORTID_NAME, &name, -1);
+ if (strcmp(name, part_ptr->name)) /* Bad pointer */
+ sview_part_info->iter_set = false;
+ g_free(name);
}
- adding:
- _append_part_record(sview_part_info, GTK_TREE_STORE(model),
- &iter, line);
- found:
- ;
+ if (sview_part_info->iter_set)
+ _update_part_record(sview_part_info,
+ GTK_TREE_STORE(model));
+ else {
+ GtkTreePath *path = gtk_tree_path_new_first();
+
+ /* get the iter, or find out the list is empty
+ * goto add */
+ if (gtk_tree_model_get_iter(
+ model, &sview_part_info->iter_ptr, path)) {
+ do {
+ /* search for the jobid and
+ check to see if it is in
+ the list */
+ gtk_tree_model_get(
+ model,
+ &sview_part_info->iter_ptr,
+ SORTID_NAME,
+ &name, -1);
+ if (!strcmp(name, part_ptr->name)) {
+ /* update with new info */
+ g_free(name);
+ _update_part_record(
+ sview_part_info,
+ GTK_TREE_STORE(model));
+ sview_part_info->iter_set = 1;
+ break;
+ }
+ g_free(name);
+ } while (gtk_tree_model_iter_next(
+ model,
+ &sview_part_info->iter_ptr));
+ }
+
+ if (!sview_part_info->iter_set) {
+ _append_part_record(sview_part_info,
+ GTK_TREE_STORE(model));
+ sview_part_info->iter_set = true;
+ }
+ gtk_tree_path_free(path);
+ }
}
list_iterator_destroy(itr);
- if (host)
- free(host);
-
- gtk_tree_path_free(path);
/* remove all old partitions */
remove_old(model, SORTID_UPDATED);
+ last_model = model;
return;
}
@@ -1514,6 +1494,12 @@
err_cpus *= cpus_per_node;
idle_cpus -= err_cpus;
+ } else if (sview_part_sub->node_state == NODE_STATE_ALLOCATED) {
+ alloc_cpus = idle_cpus;
+ idle_cpus = 0;
+ } else if (sview_part_sub->node_state != NODE_STATE_IDLE) {
+ err_cpus = idle_cpus;
+ idle_cpus = 0;
}
sview_part_sub->cpu_alloc_cnt += alloc_cpus;
@@ -1637,18 +1623,24 @@
}
static List _create_part_info_list(partition_info_msg_t *part_info_ptr,
- node_info_msg_t *node_info_ptr,
- int changed)
+ node_info_msg_t *node_info_ptr)
{
sview_part_info_t *sview_part_info = NULL;
partition_info_t *part_ptr = NULL;
+ static node_info_msg_t *last_node_info_ptr = NULL;
+ static partition_info_msg_t *last_part_info_ptr = NULL;
node_info_t *node_ptr = NULL;
static List info_list = NULL;
int i, j2;
+ sview_part_sub_t *sview_part_sub = NULL;
+ ListIterator itr;
- if (!changed && info_list) {
+ if (info_list && (node_info_ptr == last_node_info_ptr)
+ && (part_info_ptr == last_part_info_ptr))
return info_list;
- }
+
+ last_node_info_ptr = node_info_ptr;
+ last_part_info_ptr = part_info_ptr;
if (info_list)
list_flush(info_list);
@@ -1667,6 +1659,7 @@
part_ptr->flags & PART_FLAG_HIDDEN)
continue;
sview_part_info = _create_sview_part_info(part_ptr);
+ sview_part_info->pos = i;
list_append(info_list, sview_part_info);
sview_part_info->color_inx = i % sview_colors_cnt;
@@ -1687,6 +1680,35 @@
}
list_sort(sview_part_info->sub_list,
(ListCmpF)_sview_sub_part_sort);
+
+ /* Need to do this after the fact so we deal with
+ complete sub parts.
+ */
+ itr = list_iterator_create(sview_part_info->sub_list);
+ while ((sview_part_sub = list_next(itr))) {
+ sview_part_info->sub_part_total.node_cnt +=
+ sview_part_sub->node_cnt;
+ sview_part_info->sub_part_total.cpu_alloc_cnt +=
+ sview_part_sub->cpu_alloc_cnt;
+ sview_part_info->sub_part_total.cpu_error_cnt +=
+ sview_part_sub->cpu_error_cnt;
+ sview_part_info->sub_part_total.cpu_idle_cnt +=
+ sview_part_sub->cpu_idle_cnt;
+ sview_part_info->sub_part_total.disk_total +=
+ sview_part_sub->disk_total;
+ sview_part_info->sub_part_total.mem_total +=
+ sview_part_sub->mem_total;
+ if (!sview_part_info->sub_part_total.features) {
+ /* store features and reasons
+ in the others group */
+ sview_part_info->sub_part_total.features
+ = sview_part_sub->features;
+ sview_part_info->sub_part_total.reason
+ = sview_part_sub->reason;
+ }
+ hostlist_sort(sview_part_sub->hl);
+ }
+ list_iterator_destroy(itr);
}
list_sort(info_list, (ListCmpF)_sview_part_sort_aval_dec);
@@ -2239,7 +2261,6 @@
GtkTreeView *tree_view = NULL;
static GtkWidget *display_widget = NULL;
List info_list = NULL;
- int changed = 1;
int j, k;
sview_part_info_t *sview_part_info = NULL;
partition_info_t *part_ptr = NULL;
@@ -2297,7 +2318,6 @@
if ((!display_widget || view == ERROR_VIEW)
|| (part_error_code != SLURM_NO_CHANGE_IN_DATA))
goto display_it;
- changed = 0;
} else if (node_error_code != SLURM_SUCCESS) {
if (view == ERROR_VIEW)
goto end_it;
@@ -2315,9 +2335,7 @@
display_it:
- info_list = _create_part_info_list(part_info_ptr,
- node_info_ptr,
- changed);
+ info_list = _create_part_info_list(part_info_ptr, node_info_ptr);
if (!info_list)
goto reset_curs;
@@ -2406,7 +2424,6 @@
GtkTreeView *tree_view = NULL;
List info_list = NULL;
List send_info_list = NULL;
- int changed = 1;
int j=0, i=-1;
sview_part_info_t *sview_part_info_ptr = NULL;
partition_info_t *part_ptr = NULL;
@@ -2450,7 +2467,6 @@
|| spec_info->view == ERROR_VIEW)
|| (part_error_code != SLURM_NO_CHANGE_IN_DATA))
goto display_it;
- changed = 0;
} else if (node_error_code != SLURM_SUCCESS) {
if (spec_info->view == ERROR_VIEW)
goto end_it;
@@ -2468,9 +2484,8 @@
display_it:
- info_list = _create_part_info_list(part_info_ptr,
- node_info_ptr,
- changed);
+ info_list = _create_part_info_list(part_info_ptr, node_info_ptr);
+
if (!info_list)
return;
@@ -2661,11 +2676,11 @@
if (cluster_flags & CLUSTER_FLAG_BG) {
if (!state || !strlen(state))
snprintf(title, 100,
- "Base partition(s) in partition %s",
+ "Midplane(s) in partition %s",
name);
else
snprintf(title, 100,
- "Base partition(s) in partition %s "
+ "Midplane(s) in partition %s "
"that are in '%s' state",
name, state);
} else {
@@ -2965,7 +2980,7 @@
if (cluster_flags & CLUSTER_FLAG_BG) {
switch(display_data->id) {
case SORTID_NODELIST:
- display_data->name = "BP List";
+ display_data->name = "MidplaneList";
break;
default:
break;
@@ -2991,24 +3006,24 @@
display_data->name = "Blocks";
break;
case NODE_PAGE:
- display_data->name = "Base Partitions";
+ display_data->name = "Midplanes";
break;
}
if (!display_data->name) {
} else if (!strcmp(display_data->name, "Drain Nodes"))
- display_data->name = "Drain Base Partitions";
+ display_data->name = "Drain Midplanes";
else if (!strcmp(display_data->name, "Resume Nodes"))
- display_data->name = "Resume Base Partitions";
+ display_data->name = "Resume Midplanes";
else if (!strcmp(display_data->name, "Put Nodes Down"))
- display_data->name = "Put Base Partitions Down";
+ display_data->name = "Put Midplanes Down";
else if (!strcmp(display_data->name, "Make Nodes Idle"))
display_data->name =
- "Make Base Partitions Idle";
+ "Make Midplanes Idle";
else if (!strcmp(display_data->name,
"Update Node Features"))
display_data->name =
- "Update Base Partitions Features";
+ "Update Midplanes Features";
} else {
switch(display_data->id) {
case BLOCK_PAGE:
@@ -3021,21 +3036,21 @@
if (!display_data->name) {
} else if (!strcmp(display_data->name,
- "Drain Base Partitions"))
+ "Drain Midplanes"))
display_data->name = "Drain Nodes";
else if (!strcmp(display_data->name,
- "Resume Base Partitions"))
+ "Resume Midplanes"))
display_data->name = "Resume Nodes";
else if (!strcmp(display_data->name,
- "Put Base Partitions Down"))
+ "Put Midplanes Down"))
display_data->name = "Put Nodes Down";
else if (!strcmp(display_data->name,
- "Make Base Partitions Idle"))
+ "Make Midplanes Idle"))
display_data->name = "Make Nodes Idle";
else if (!strcmp(display_data->name,
"Update Node Features"))
display_data->name =
- "Update Base Partitions Features";
+ "Update Midplanes Features";
}
}
get_info_part(NULL, NULL);
diff --git a/src/sview/popups.c b/src/sview/popups.c
index 18e4867..4d522ab 100644
--- a/src/sview/popups.c
+++ b/src/sview/popups.c
@@ -72,7 +72,7 @@
char *type;
if (cluster_flags & CLUSTER_FLAG_BG)
- type = "Base partition";
+ type = "Midplane";
else
type = "Node";
@@ -544,13 +544,14 @@
"Reservation creation specifications\n\n"
"Specify Time_Start and either Duration or Time_End.\n"
#ifdef HAVE_BG
- "Specify either Node_Count or BP_List.\n"
+ "Specify either Node_Count or Midplane_List.\n"
#else
"Specify either Node_Count or Node_List.\n"
#endif
"Specify either Accounts or Users.\n\n"
"Supported Flags include: Maintenance, Overlap,\n"
- "Ignore_Jobs, Daily and Weekly.\n"
+ "Ignore_Jobs, Daily and Weekly, License_Only\n"
+ "and Static_Alloc.\n"
"All other fields are optional.");
resv_msg = xmalloc(sizeof(resv_desc_msg_t));
slurm_init_resv_desc_msg(resv_msg);
@@ -712,7 +713,7 @@
sview_search_info.search_type = SEARCH_NODE_NAME;
entry = create_entry();
if (cluster_flags & CLUSTER_FLAG_BG)
- label = gtk_label_new("Which base partition(s)?\n"
+ label = gtk_label_new("Which Midplane(s)?\n"
"(ranged or comma separated)");
else
label = gtk_label_new("Which node(s)?\n"
@@ -992,6 +993,8 @@
working_sview_config.grid_vert =
gtk_spin_button_get_value_as_int(
GTK_SPIN_BUTTON(vert_sb));
+ memcpy(&default_sview_config, &working_sview_config,
+ sizeof(sview_config_t));
if ((width == working_sview_config.grid_x_width)
&& (hori == working_sview_config.grid_hori)
&& (vert == working_sview_config.grid_vert)) {
diff --git a/src/sview/resv_info.c b/src/sview/resv_info.c
index 39772c1..f476b43 100644
--- a/src/sview/resv_info.c
+++ b/src/sview/resv_info.c
@@ -35,6 +35,9 @@
/* Collection of data for printing reports. Like data is combined here */
typedef struct {
int color_inx;
+ GtkTreeIter iter_ptr;
+ bool iter_set;
+ int pos;
reserve_info_t *resv_ptr;
} sview_resv_info_t;
@@ -90,7 +93,7 @@
refresh_resv, create_model_resv, admin_edit_resv},
{G_TYPE_STRING, SORTID_NODELIST,
#ifdef HAVE_BG
- "BP List",
+ "MidplaneList",
#else
"Node List",
#endif
@@ -111,7 +114,7 @@
refresh_resv, create_model_resv, admin_edit_resv},
{G_TYPE_STRING, SORTID_FEATURES, "Features", FALSE, EDIT_TEXTBOX,
refresh_resv, create_model_resv, admin_edit_resv},
- {G_TYPE_STRING, SORTID_FLAGS, "Flags", FALSE, EDIT_NONE,
+ {G_TYPE_STRING, SORTID_FLAGS, "Flags", FALSE, EDIT_TEXTBOX,
refresh_resv, create_model_resv, admin_edit_resv},
{G_TYPE_POINTER, SORTID_NODE_INX, NULL, FALSE, EDIT_NONE,
refresh_resv, create_model_resv, admin_edit_resv},
@@ -131,7 +134,7 @@
refresh_resv, create_model_resv, admin_edit_resv},
{G_TYPE_STRING, SORTID_NODELIST,
#ifdef HAVE_BG
- "BP_List",
+ "Midplane_List",
#else
"Node_List",
#endif
@@ -166,7 +169,7 @@
{G_TYPE_STRING, PART_PAGE, "Partitions", TRUE, RESV_PAGE},
#ifdef HAVE_BG
{G_TYPE_STRING, BLOCK_PAGE, "Blocks", TRUE, RESV_PAGE},
- {G_TYPE_STRING, NODE_PAGE, "Base Partitions", TRUE, RESV_PAGE},
+ {G_TYPE_STRING, NODE_PAGE, "Midplanes", TRUE, RESV_PAGE},
#else
{G_TYPE_STRING, BLOCK_PAGE, NULL, TRUE, RESV_PAGE},
{G_TYPE_STRING, NODE_PAGE, "Nodes", TRUE, RESV_PAGE},
@@ -243,6 +246,13 @@
outflags |= RESERVE_FLAG_NO_LIC_ONLY;
else
outflags |= RESERVE_FLAG_LIC_ONLY;
+ } else if (strncasecmp(curr, "Static_Alloc", MAX(taglen,1))
+ == 0) {
+ curr += taglen;
+ if (flip)
+ outflags |= RESERVE_FLAG_NO_STATIC;
+ else
+ outflags |= RESERVE_FLAG_STATIC;
} else {
char *temp = g_strdup_printf("Error parsing flags %s.",
flagstr);
@@ -293,7 +303,8 @@
int column)
{
char *type = "", *temp_str;
- int temp_int = 0;
+ char *tmp_text, *last = NULL, *tok;
+ int block_inx, temp_int = 0;
uint32_t f;
/* need to clear global_edit_error here (just in case) */
@@ -345,16 +356,27 @@
type = "name";
break;
case SORTID_NODE_CNT:
- temp_int = strtol(new_text, &temp_str, 10);
- if ((temp_str[0] == 'k') || (temp_str[0] == 'k'))
- temp_int *= 1024;
- if ((temp_str[0] == 'm') || (temp_str[0] == 'm'))
- temp_int *= (1024 * 1024);
-
type = "Node Count";
- if (temp_int <= 0)
- goto return_error;
- resv_msg->node_cnt = temp_int;
+ block_inx = 0;
+ tmp_text = xstrdup(new_text);
+ tok = strtok_r(tmp_text, ",", &last);
+ while (tok) {
+ temp_int = strtol(tok, &temp_str, 10);
+ if ((temp_str[0] == 'k') || (temp_str[0] == 'k'))
+ temp_int *= 1024;
+ if ((temp_str[0] == 'm') || (temp_str[0] == 'm'))
+ temp_int *= (1024 * 1024);
+ xrealloc(resv_msg->node_cnt,
+ (sizeof(uint32_t) * (block_inx + 2)));
+ resv_msg->node_cnt[block_inx++] = temp_int;
+ if (temp_int <= 0) {
+ xfree(tmp_text);
+ xfree(resv_msg->node_cnt);
+ goto return_error;
+ }
+ tok = strtok_r(NULL, ",", &last);
+ }
+ xfree(tmp_text);
break;
case SORTID_NODELIST:
resv_msg->node_list = xstrdup(new_text);
@@ -537,6 +559,7 @@
SORTID_LICENSES),
resv_ptr->licenses);
+ /* NOTE: node_cnt in reservation info from slurmctld ONE number */
convert_num_unit((float)resv_ptr->node_cnt,
time_buf, sizeof(time_buf), UNIT_NONE);
add_display_treestore_line(update, treestore, &iter,
@@ -574,8 +597,7 @@
}
static void _update_resv_record(sview_resv_info_t *sview_resv_info_ptr,
- GtkTreeStore *treestore,
- GtkTreeIter *iter)
+ GtkTreeStore *treestore)
{
char tmp_duration[40], tmp_end[40], tmp_nodes[40], tmp_start[40];
char *tmp_flags;
@@ -597,7 +619,7 @@
sizeof(tmp_start));
/* Combining these records provides a slight performance improvement */
- gtk_tree_store_set(treestore, iter,
+ gtk_tree_store_set(treestore, &sview_resv_info_ptr->iter_ptr,
SORTID_ACCOUNTS, resv_ptr->accounts,
SORTID_COLOR,
sview_colors[sview_resv_info_ptr->color_inx],
@@ -614,7 +636,7 @@
SORTID_TIME_START, tmp_start,
SORTID_TIME_END, tmp_end,
SORTID_UPDATED, 1,
- SORTID_USERS, resv_ptr->users,
+ SORTID_USERS, resv_ptr->users,
-1);
xfree(tmp_flags);
@@ -623,80 +645,91 @@
}
static void _append_resv_record(sview_resv_info_t *sview_resv_info_ptr,
- GtkTreeStore *treestore, GtkTreeIter *iter,
- int line)
+ GtkTreeStore *treestore)
{
- gtk_tree_store_append(treestore, iter, NULL);
- gtk_tree_store_set(treestore, iter, SORTID_POS, line, -1);
- _update_resv_record(sview_resv_info_ptr, treestore, iter);
+ gtk_tree_store_append(treestore, &sview_resv_info_ptr->iter_ptr, NULL);
+ gtk_tree_store_set(treestore, &sview_resv_info_ptr->iter_ptr,
+ SORTID_POS, sview_resv_info_ptr->pos, -1);
+ _update_resv_record(sview_resv_info_ptr, treestore);
}
static void _update_info_resv(List info_list,
GtkTreeView *tree_view)
{
- GtkTreePath *path = gtk_tree_path_new_first();
GtkTreeModel *model = gtk_tree_view_get_model(tree_view);
- GtkTreeIter iter;
+ static GtkTreeModel *last_model = NULL;
reserve_info_t *resv_ptr = NULL;
- int line = 0;
- char *host = NULL, *resv_name = NULL;
+ char *name = NULL;
ListIterator itr = NULL;
sview_resv_info_t *sview_resv_info = NULL;
- /* get the iter, or find out the list is empty goto add */
- if (gtk_tree_model_get_iter(model, &iter, path)) {
- /* make sure all the reserves are still here */
- while (1) {
- gtk_tree_store_set(GTK_TREE_STORE(model), &iter,
- SORTID_UPDATED, 0, -1);
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
- }
- }
- }
+ set_for_update(model, SORTID_UPDATED);
itr = list_iterator_create(info_list);
while ((sview_resv_info = (sview_resv_info_t*) list_next(itr))) {
resv_ptr = sview_resv_info->resv_ptr;
- /* get the iter, or find out the list is empty goto add */
- if (!gtk_tree_model_get_iter(model, &iter, path)) {
- goto adding;
- }
- line = 0;
- while (1) {
- /* search for the jobid and check to see if
- it is in the list */
- gtk_tree_model_get(model, &iter, SORTID_NAME,
- &resv_name, -1);
- if (!strcmp(resv_name, resv_ptr->name)) {
- /* update with new info */
- g_free(resv_name);
- _update_resv_record(sview_resv_info,
- GTK_TREE_STORE(model),
- &iter);
- goto found;
- }
- g_free(resv_name);
- line++;
- if (!gtk_tree_model_iter_next(model, &iter)) {
- break;
+ /* This means the tree_store changed (added new column
+ or something). */
+ if (last_model != model)
+ sview_resv_info->iter_set = false;
+
+ if (sview_resv_info->iter_set) {
+ gtk_tree_model_get(model, &sview_resv_info->iter_ptr,
+ SORTID_NAME, &name, -1);
+ if (strcmp(name, resv_ptr->name)) { /* Bad pointer */
+ sview_resv_info->iter_set = false;
+ //g_print("bad resv iter pointer\n");
}
+ g_free(name);
}
- adding:
- _append_resv_record(sview_resv_info, GTK_TREE_STORE(model),
- &iter, line);
- found:
- ;
+ if (sview_resv_info->iter_set) {
+ _update_resv_record(sview_resv_info,
+ GTK_TREE_STORE(model));
+ } else {
+ GtkTreePath *path = gtk_tree_path_new_first();
+
+ /* get the iter, or find out the list is empty
+ * goto add */
+ if (gtk_tree_model_get_iter(
+ model, &sview_resv_info->iter_ptr, path)) {
+ do {
+ /* search for the jobid and
+ check to see if it is in
+ the list */
+ gtk_tree_model_get(
+ model,
+ &sview_resv_info->iter_ptr,
+ SORTID_NAME,
+ &name, -1);
+ if (!strcmp(name, resv_ptr->name)) {
+ /* update with new info */
+ g_free(name);
+ _update_resv_record(
+ sview_resv_info,
+ GTK_TREE_STORE(model));
+ sview_resv_info->iter_set = 1;
+ break;
+ }
+ g_free(name);
+ } while (gtk_tree_model_iter_next(
+ model,
+ &sview_resv_info->iter_ptr));
+ }
+
+ if (!sview_resv_info->iter_set) {
+ _append_resv_record(sview_resv_info,
+ GTK_TREE_STORE(model));
+ sview_resv_info->iter_set = true;
+ }
+ gtk_tree_path_free(path);
+ }
}
list_iterator_destroy(itr);
- if (host)
- free(host);
- gtk_tree_path_free(path);
/* remove all old reservations */
remove_old(model, SORTID_UPDATED);
- return;
+ last_model = model;
}
static int _sview_resv_sort_aval_dec(sview_resv_info_t* rec_a,
@@ -721,17 +754,19 @@
return 0;
}
-static List _create_resv_info_list(reserve_info_msg_t *resv_info_ptr,
- int changed)
+static List _create_resv_info_list(reserve_info_msg_t *resv_info_ptr)
{
static List info_list = NULL;
int i = 0;
+ static reserve_info_msg_t *last_resv_info_ptr = NULL;
sview_resv_info_t *sview_resv_info_ptr = NULL;
reserve_info_t *resv_ptr = NULL;
- if (!changed && info_list)
+ if (info_list && (resv_info_ptr == last_resv_info_ptr))
goto update_color;
+ last_resv_info_ptr = resv_info_ptr;
+
if (info_list)
list_flush(info_list);
else
@@ -746,6 +781,7 @@
resv_ptr = &(resv_info_ptr->reservation_array[i]);
sview_resv_info_ptr = xmalloc(sizeof(sview_resv_info_t));
+ sview_resv_info_ptr->pos = i;
sview_resv_info_ptr->resv_ptr = resv_ptr;
sview_resv_info_ptr->color_inx = i % sview_colors_cnt;
list_append(info_list, sview_resv_info_ptr);
@@ -1049,7 +1085,6 @@
GtkTreeView *tree_view = NULL;
static GtkWidget *display_widget = NULL;
int j=0;
- int changed = 1;
ListIterator itr = NULL;
sview_resv_info_t *sview_resv_info_ptr = NULL;
reserve_info_t *resv_ptr = NULL;
@@ -1085,7 +1120,6 @@
error_code = get_new_info_resv(&resv_info_ptr, force_refresh);
if (error_code == SLURM_NO_CHANGE_IN_DATA) {
- changed = 0;
} else if (error_code != SLURM_SUCCESS) {
if (view == ERROR_VIEW)
goto end_it;
@@ -1102,7 +1136,7 @@
}
display_it:
- info_list = _create_resv_info_list(resv_info_ptr, changed);
+ info_list = _create_resv_info_list(resv_info_ptr);
if (!info_list)
goto reset_curs;
/* set up the grid */
@@ -1185,7 +1219,6 @@
GtkTreeView *tree_view = NULL;
List resv_list = NULL;
List send_resv_list = NULL;
- int changed = 1;
sview_resv_info_t *sview_resv_info_ptr = NULL;
int j=0, i=-1;
hostset_t hostset = NULL;
@@ -1206,7 +1239,6 @@
== SLURM_NO_CHANGE_IN_DATA) {
if (!spec_info->display_widget || spec_info->view == ERROR_VIEW)
goto display_it;
- changed = 0;
} else if (resv_error_code != SLURM_SUCCESS) {
if (spec_info->view == ERROR_VIEW)
goto end_it;
@@ -1226,7 +1258,7 @@
display_it:
- resv_list = _create_resv_info_list(resv_info_ptr, changed);
+ resv_list = _create_resv_info_list(resv_info_ptr);
if (!resv_list)
return;
@@ -1393,7 +1425,7 @@
case NODE_PAGE:
if (cluster_flags & CLUSTER_FLAG_BG)
snprintf(title, 100,
- "Base partitions(s) in reservation %s",
+ "Midplane(s) in reservation %s",
name);
else
snprintf(title, 100, "Node(s) in reservation %s ",
@@ -1625,7 +1657,7 @@
if (cluster_flags & CLUSTER_FLAG_BG) {
switch(display_data->id) {
case SORTID_NODELIST:
- display_data->name = "BP List";
+ display_data->name = "MidplaneList";
break;
default:
break;
@@ -1651,7 +1683,7 @@
display_data->name = "Blocks";
break;
case NODE_PAGE:
- display_data->name = "Base Partitions";
+ display_data->name = "Midplanes";
break;
}
} else {
diff --git a/src/sview/submit_info.c b/src/sview/submit_info.c
index c0ef205..922d11c 100644
--- a/src/sview/submit_info.c
+++ b/src/sview/submit_info.c
@@ -48,7 +48,7 @@
{G_TYPE_STRING, SORTID_TIMELIMIT, "TIMELIMIT", TRUE, -1},
{G_TYPE_STRING, SORTID_NODES, "NODES", TRUE, -1},
#ifdef HAVE_BG
- {G_TYPE_STRING, SORTID_NODELIST, "BP_LIST", TRUE, -1},
+ {G_TYPE_STRING, SORTID_NODELIST, "MIDPLANELIST", TRUE, -1},
#else
{G_TYPE_STRING, SORTID_NODELIST, "NODELIST", TRUE, -1},
#endif
diff --git a/src/sview/sview.c b/src/sview/sview.c
index d0f2aac..07ad51e 100644
--- a/src/sview/sview.c
+++ b/src/sview/sview.c
@@ -130,7 +130,7 @@
refresh_main, NULL, NULL,
get_info_block, specific_info_block,
set_menus_block, NULL},
- {G_TYPE_NONE, NODE_PAGE, "Base Partitions", FALSE, -1,
+ {G_TYPE_NONE, NODE_PAGE, "Midplanes", FALSE, -1,
refresh_main, NULL, NULL,
get_info_node, specific_info_node,
set_menus_node, NULL},
@@ -592,6 +592,14 @@
gtk_toggle_action_set_active(toggle_action, new_state);
debug_action = gtk_action_group_get_action(menu_action_group,
+ "flags_no_real_time");
+ toggle_action = GTK_TOGGLE_ACTION(debug_action);
+ orig_state = gtk_toggle_action_get_active(toggle_action);
+ new_state = debug_flags & DEBUG_FLAG_NO_REALTIME;
+ if (orig_state != new_state)
+ gtk_toggle_action_set_active(toggle_action, new_state);
+
+ debug_action = gtk_action_group_get_action(menu_action_group,
"flags_prio");
toggle_action = GTK_TOGGLE_ACTION(debug_action);
orig_state = gtk_toggle_action_get_active(toggle_action);
@@ -723,6 +731,10 @@
{
_set_flags(action, DEBUG_FLAG_NO_CONF_HASH);
}
+static void _set_flags_no_real_time(GtkToggleAction *action)
+{
+ _set_flags(action, DEBUG_FLAG_NO_REALTIME);
+}
static void _set_flags_prio(GtkToggleAction *action)
{
_set_flags(action, DEBUG_FLAG_PRIO);
@@ -874,6 +886,7 @@
" <menuitem action='flags_gang'/>"
" <menuitem action='flags_gres'/>"
" <menuitem action='flags_no_conf_hash'/>"
+ " <menuitem action='flags_no_real_time'/>"
" <menuitem action='flags_prio'/>"
" <menuitem action='flags_reservation'/>"
" <menuitem action='flags_select_type'/>"
@@ -892,7 +905,8 @@
" <menuitem action='topoorder'/>"
#endif
" <menuitem action='ruled'/>");
- if (!(cluster_flags & CLUSTER_FLAG_BG))
+ if (!(cluster_flags & CLUSTER_FLAG_BG) &&
+ !(cluster_flags & CLUSTER_FLAG_CRAYXT))
xstrcat(ui_description,
" <menuitem action='grid_specs'/>");
@@ -1004,12 +1018,12 @@
"Search for BG Blocks having given state",
G_CALLBACK(create_search_popup)},
{"node_name_bg", NULL,
- "Base Partition(s) Name",
- "", "Search for a specific Base Partition(s)",
+ "Midplane(s) Name",
+ "", "Search for a specific Midplane(s)",
G_CALLBACK(create_search_popup)},
{"node_state_bg", NULL,
- "Base Partition State",
- "", "Search for a Base Partition in a given state",
+ "Midplane State",
+ "", "Search for a Midplane in a given state",
G_CALLBACK(create_search_popup)},
};
@@ -1110,6 +1124,8 @@
"Gres", G_CALLBACK(_set_flags_gres), FALSE},
{"flags_no_conf_hash", NULL, "NO CONF HASH", NULL,
"NO_CONF_HASH", G_CALLBACK(_set_flags_no_conf_hash), FALSE},
+ {"flags_no_real_time", NULL, "NoRealTime", NULL,
+ "NoRealTime", G_CALLBACK(_set_flags_no_real_time), FALSE},
{"flags_prio", NULL, "Priority", NULL,
"Priority", G_CALLBACK(_set_flags_prio), FALSE},
{"flags_reservation", NULL, "Reservation", NULL,
@@ -1324,6 +1340,8 @@
working_sview_config.grid_hori = default_sview_config.grid_hori;
working_sview_config.grid_vert = default_sview_config.grid_vert;
}
+ gtk_table_set_col_spacings(main_grid_table, 0);
+ gtk_table_set_row_spacings(main_grid_table, 0);
if (!orig_cluster_name)
orig_cluster_name = slurm_get_cluster_name();
@@ -1344,7 +1362,7 @@
display_data->show = TRUE;
break;
case NODE_PAGE:
- display_data->name = "Base Partitions";
+ display_data->name = "Midplanes";
break;
default:
break;
@@ -1481,7 +1499,7 @@
g_free(tmp);
}
-static GtkWidget *_create_cluster_combo()
+static GtkWidget *_create_cluster_combo(void)
{
GtkListStore *model = NULL;
GtkWidget *combo = NULL;
diff --git a/src/sview/sview.h b/src/sview/sview.h
index 976f5a7..60230d8 100644
--- a/src/sview/sview.h
+++ b/src/sview/sview.h
@@ -303,14 +303,15 @@
} grid_button_t;
typedef struct {
- node_info_t *node_ptr;
- char *color;
- int pos;
char *boot_time;
- char *reason;
- char *slurmd_start_time;
- bool iter_set;
+ char *color;
GtkTreeIter iter_ptr;
+ bool iter_set;
+ node_info_t *node_ptr;
+ int pos;
+ char *reason;
+ char *rack_mp;
+ char *slurmd_start_time;
} sview_node_info_t;
typedef struct {
@@ -512,7 +513,7 @@
extern void refresh_node(GtkAction *action, gpointer user_data);
/* don't destroy the list from this function */
extern List create_node_info_list(node_info_msg_t *node_info_ptr,
- int changed, bool by_partition);
+ bool by_partition);
extern int update_features_node(GtkDialog *dialog, const char *nodelist,
const char *old_features);
extern int update_state_node(GtkDialog *dialog,
@@ -625,6 +626,7 @@
extern gboolean delete_popup(GtkWidget *widget, GtkWidget *event, char *title);
extern gboolean delete_popups(void);
extern void *popup_thr(popup_info_t *popup_win);
+extern void set_for_update(GtkTreeModel *model, int updated);
extern void remove_old(GtkTreeModel *model, int updated);
extern GtkWidget *create_pulldown_combo(display_data_t *display_data,
int count);
diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in
index 01a53e2..d0b207f 100644
--- a/testsuite/Makefile.in
+++ b/testsuite/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -208,6 +208,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -244,6 +245,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -514,7 +516,7 @@
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
check-DEJAGNU: site.exp
- srcdir=`$(am__cd) $(srcdir) && pwd`; export srcdir; \
+ srcdir='$(srcdir)'; export srcdir; \
EXPECT=$(EXPECT); export EXPECT; \
runtest=$(RUNTEST); \
if $(SHELL) -c "$$runtest --version" > /dev/null 2>&1; then \
@@ -525,12 +527,12 @@
else echo "WARNING: could not find \`runtest'" 1>&2; :;\
fi; \
exit $$exit_status
-site.exp: Makefile
+site.exp: Makefile $(EXTRA_DEJAGNU_SITE_CONFIG)
@echo 'Making a new site.exp file...'
@echo '## these variables are automatically generated by make ##' >site.tmp
@echo '# Do not edit here. If you wish to override these values' >>site.tmp
@echo '# edit the last section' >>site.tmp
- @echo 'set srcdir $(srcdir)' >>site.tmp
+ @echo 'set srcdir "$(srcdir)"' >>site.tmp
@echo "set objdir `pwd`" >>site.tmp
@echo 'set build_alias "$(build_alias)"' >>site.tmp
@echo 'set build_triplet $(build_triplet)' >>site.tmp
@@ -538,9 +540,16 @@
@echo 'set host_triplet $(host_triplet)' >>site.tmp
@echo 'set target_alias "$(target_alias)"' >>site.tmp
@echo 'set target_triplet $(target_triplet)' >>site.tmp
- @echo '## All variables above are generated by configure. Do Not Edit ##' >>site.tmp
- @test ! -f site.exp || \
- sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
+ @list='$(EXTRA_DEJAGNU_SITE_CONFIG)'; for f in $$list; do \
+ echo "## Begin content included from file $$f. Do not modify. ##" \
+ && cat `test -f "$$f" || echo '$(srcdir)/'`$$f \
+ && echo "## End content included from file $$f. ##" \
+ || exit 1; \
+ done >> site.tmp
+ @echo "## End of auto-generated content; you can edit from here. ##" >> site.tmp
+ @if test -f site.exp; then \
+ sed -e '1,/^## End of auto-generated content.*##/d' site.exp >> site.tmp; \
+ fi
@-rm -f site.bak
@test ! -f site.exp || mv site.exp site.bak
@mv site.tmp site.exp
@@ -625,10 +634,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am
index 2f159b4..5f452d9 100644
--- a/testsuite/expect/Makefile.am
+++ b/testsuite/expect/Makefile.am
@@ -14,6 +14,7 @@
pkill \
README \
regression \
+ regression.py \
test1.1 \
test1.2 \
test1.3 \
@@ -50,8 +51,10 @@
test1.32.prog.c \
test1.33 \
test1.34 \
+ test1.34.prog.c \
test1.35 \
test1.36 \
+ test1.37 \
test1.38 \
test1.40 \
test1.41 \
@@ -66,6 +69,8 @@
test1.50 \
test1.51 \
test1.52 \
+ test1.53 \
+ test1.53.prog.c \
test1.54 \
test1.55 \
test1.56 \
@@ -97,6 +102,9 @@
test1.91.prog.c \
test1.92 \
test1.93 \
+ test1.94 \
+ test1.94.master.c \
+ test1.94.slave.c \
test2.1 \
test2.2 \
test2.3 \
@@ -209,6 +217,7 @@
test8.21.bash \
test8.22 \
test8.23 \
+ test8.24 \
test9.1 \
test9.2 \
test9.3 \
@@ -218,6 +227,7 @@
test9.7 \
test9.7.bash \
test9.8 \
+ test9.9 \
test10.1 \
test10.2 \
test10.3 \
@@ -241,6 +251,9 @@
test12.1 \
test12.2 \
test12.2.prog.c \
+ test12.3 \
+ inc12.3.1 \
+ inc12.3.2 \
test12.4 \
test12.5 \
test13.1 \
@@ -323,6 +336,10 @@
test19.5 \
test19.6 \
test19.7 \
+ test20.1 \
+ test20.2 \
+ test20.3 \
+ test20.4 \
test21.1 \
test21.2 \
test21.3 \
@@ -362,6 +379,11 @@
test25.1 \
test26.1 \
test26.2 \
+ test27.1 \
+ test27.2 \
+ test27.3 \
+ test27.4 \
+ test27.5 \
usleep
distclean-local:
diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in
index 95c394d..af1fbf5 100644
--- a/testsuite/expect/Makefile.in
+++ b/testsuite/expect/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -161,6 +161,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -197,6 +198,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -298,6 +300,7 @@
pkill \
README \
regression \
+ regression.py \
test1.1 \
test1.2 \
test1.3 \
@@ -334,8 +337,10 @@
test1.32.prog.c \
test1.33 \
test1.34 \
+ test1.34.prog.c \
test1.35 \
test1.36 \
+ test1.37 \
test1.38 \
test1.40 \
test1.41 \
@@ -350,6 +355,8 @@
test1.50 \
test1.51 \
test1.52 \
+ test1.53 \
+ test1.53.prog.c \
test1.54 \
test1.55 \
test1.56 \
@@ -381,6 +388,9 @@
test1.91.prog.c \
test1.92 \
test1.93 \
+ test1.94 \
+ test1.94.master.c \
+ test1.94.slave.c \
test2.1 \
test2.2 \
test2.3 \
@@ -493,6 +503,7 @@
test8.21.bash \
test8.22 \
test8.23 \
+ test8.24 \
test9.1 \
test9.2 \
test9.3 \
@@ -502,6 +513,7 @@
test9.7 \
test9.7.bash \
test9.8 \
+ test9.9 \
test10.1 \
test10.2 \
test10.3 \
@@ -525,6 +537,9 @@
test12.1 \
test12.2 \
test12.2.prog.c \
+ test12.3 \
+ inc12.3.1 \
+ inc12.3.2 \
test12.4 \
test12.5 \
test13.1 \
@@ -607,6 +622,10 @@
test19.5 \
test19.6 \
test19.7 \
+ test20.1 \
+ test20.2 \
+ test20.3 \
+ test20.4 \
test21.1 \
test21.2 \
test21.3 \
@@ -646,6 +665,11 @@
test25.1 \
test26.1 \
test26.2 \
+ test27.1 \
+ test27.2 \
+ test27.3 \
+ test27.4 \
+ test27.5 \
usleep
all: all-am
@@ -738,10 +762,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/testsuite/expect/README b/testsuite/expect/README
index da2bc85..04c6537 100644
--- a/testsuite/expect/README
+++ b/testsuite/expect/README
@@ -193,6 +193,7 @@
test1.91 Test of CPU affinity for multi-core systems.
test1.92 Test of task distribution support on multi-core systems.
test1.93 Test of LAM-MPI functionality
+test1.94 Test of MPICH2 task spawn logic
**NOTE** The above tests for mutliple processor/partition systems only
test2.# Testing of scontrol options (to be run as unprivileged user).
@@ -261,7 +262,7 @@
test5.4 Test squeue formating options (--noheader, --format and --step
options and SQUEUE_FORMAT environment variable).
test5.5 Test squeue sorting (--sort option).
-test5.6 Test squeue filtering (--jobs, --node, --states, --steps and
+test5.6 Test squeue filtering (--jobs, --nodelist, --states, --steps and
--user options).
test5.7 Confirm that squeue verbose option works (--verbose option).
test5.8 Confirm that squeue reports a proper version number (--version
@@ -318,7 +319,6 @@
of a job record in the database
test7.15 Verify signal mask of tasks have no ignored signals.
-
test8.# Test of Blue Gene specific functionality.
=================================================
test8.1 Test of Blue Gene specific sbatch command line options
@@ -338,6 +338,8 @@
within the job's allocation
test8.23 Bluegene/Q only: Test that multple jobs allocations are properly
packed within a midplane
+test8.24 Bluegene/Q only: Test advanced reservation creation with mutliple
+ blocks.
test9.# System stress testing. Exercises all commands and daemons.
@@ -350,6 +352,7 @@
test9.6 Stress test of per-task output files.
test9.7 Stress test multiple simultaneous commands via multiple threads.
test9.8 Stress test with maximum slurmctld message concurrency.
+test9.9 Throughput test for 2000 jobs for timing
test10.# Testing of smap options.
@@ -387,7 +390,7 @@
test12.1 Test sacct --help option.
test12.2 Test validity/accuracy of accounting data for exit code,
memory and real-time information along with stating a running job.
-test12.3 (EMPTY)
+test12.3 Test sacct filtering of records by account and job name.
test12.4 Test sacct --b, g, j, l, n, p, u, v options.
test12.5 Test sacct --helpformat option.
@@ -531,7 +534,7 @@
test21.# Testing of sacctmgr commands and options.
-=================================================
+====================================================
test21.1 sacctmgr --usage
test21.2 sacctmgr --help
test21.3 sacctmgr -V
@@ -563,28 +566,41 @@
test21.28 sacctmgr abort delete
test21.29 sacctmgr clear (modify) QoS values
+
test22.# Testing of sreport commands and options.
These also test the sacctmgr archive dump/load functions.
-=================================================
+==================================================
test22.1 sreport cluster utilization report
test22.2 sreport h, n, p, P, t, V options
+
test23.# Testing of sstat commands and options.
=================================================
test23.1 sstat -e, h, usage and V options
-test23.2 sstat --helpformat
-test23.3 sstat -a, n, o, p, P, v options
+test23.2 sstat -a, n, o, p, P, v options
+
test24.# Testing of sshare/priority multifactor plugin.
-=================================================
+=========================================================
test24.1 multifactor plugin algo test
test24.2 sshare h, n, p, P, v, and V options.
+
test25.# Testing of sprio command and options.
-=================================================
+================================================
test25.1 sprio all options
+
test26.# Test of Cray specific functionality.
================================================
test26.1 Validate scontrol update command for nodes is disabled.
test26.2 Test of srun/aprun wrapper use of --alps= option
+
+
+test27.# Testing of sdiag commands and options.
+=================================================
+test27.1 sdiag --usage
+test27.2 sdiag --help
+test27.3 sdiag --version
+test27.4 sdiag --all (default output)
+test27.5 sdiag --reset
diff --git a/testsuite/expect/globals b/testsuite/expect/globals
index 219ba51..3cff158 100755
--- a/testsuite/expect/globals
+++ b/testsuite/expect/globals
@@ -45,7 +45,17 @@
global sacctmgr sacct salloc sattach sbatch sbcast scancel scontrol sinfo smap squeue sreport srun sstat strigger
-# Conditional set. Only set variable if variable does not yet exist.
+################################################################
+#
+# Proc: cset
+#
+# Purpose: Conditional set. Only set variable if variable does not yet exist.
+#
+# Input: name -- name of the variable to set
+# value -- value to set to 'name'
+#
+################################################################
+
proc cset {name value} {
if {![uplevel 1 info exists $name]} {
upvar $name tmp
@@ -78,6 +88,7 @@
cset sbcast "${slurm_dir}/bin/sbcast"
cset scancel "${slurm_dir}/bin/scancel"
cset scontrol "${slurm_dir}/bin/scontrol"
+cset sdiag "${slurm_dir}/bin/sdiag"
cset sinfo "${slurm_dir}/bin/sinfo"
cset smap "${slurm_dir}/bin/smap"
cset sprio "${slurm_dir}/bin/sprio"
@@ -196,9 +207,10 @@
set alpha_under "\[A-Z_\]+"
set digit "\[0-9\]"
set end_of_line "\[\r\n\]"
+set float "\[0-9\]+\\.?\[0-9\]*"
set number "\[0-9\]+"
set number_with_suffix "\[0-9\]+\[KM\]*"
-set float "\[0-9\]+\\.?\[0-9\]+"
+set slash "/"
set whitespace "\[ \t\n\r\f\v\]+"
set alpha_numeric_nodelist "$alpha_numeric_under\\\[?\[$alpha_numeric_comma\]?\\\]?"
#
@@ -614,7 +626,7 @@
if {[regexp {Nodes=} $line foo] == 1} {
return 0
}
- if {[regexp {BP_List=} $line foo] == 1} {
+ if {[regexp {MidplaneList=} $line foo] == 1} {
return 0
}
if { $my_delay > $max_job_state_delay } {
@@ -632,6 +644,81 @@
}
}
+################################################################
+#
+# Proc: wait_for_all_jobs
+#
+# Purpose: Wait for previously submitted SLURM jobs to finish of a
+# certain name, if incr_sleep is set exponential back-off 1 to 10 seconds
+#
+# Returns: A non-zero return code indicates a failure.
+#
+# Input: job_name -- The name of job to wait for.
+# incr_sleep -- To exponentially back-off or not
+#
+#
+################################################################
+# Wait up to 900 seconds for all jobs to terminate
+# Return 0 if all jobs done, remainin job count otherwise
+proc wait_for_all_jobs { job_name incr_sleep } {
+ global scancel squeue bin_sleep
+
+ set matches 0
+ set sleep_time 1
+ set my_delay 0
+ set last_matches 0
+ set timeout 30
+ send_user "Waiting for all jobs to terminate\n"
+ for {set inx 0} {$inx < 600} {incr inx} {
+ log_user 0
+ set matches 0
+ spawn $squeue -o %j
+ expect {
+ -re "$job_name" {
+ incr matches
+ exp_continue
+ }
+ -re "error" {
+ set matches -1
+ }
+ timeout {
+ send_user "No response from squeue\n"
+ set matches -1
+ }
+ eof {
+ wait
+ }
+ }
+ log_user 1
+ if {$matches == 0} {
+ send_user "All jobs complete\n"
+ break
+ }
+ if {$matches > 0} {
+ send_user " $matches jobs remaining\n"
+# Moab can slow throughput down a lot,
+# so don't return here
+# if {$matches == $last_matches} {
+# send_user "Running jobs hung\n"
+# break
+# }
+# set last_matches $matches
+ exec sleep $sleep_time
+ set my_delay [expr $my_delay + $sleep_time]
+ if { $incr_sleep } {
+ set sleep_time [expr $sleep_time * 2]
+ if { $sleep_time > 10 } {
+ set sleep_time 10
+ }
+ }
+ }
+ }
+ if {$matches != 0} {
+ exec $scancel -n $job_name
+ }
+ return $matches
+}
+
################################################################
#
@@ -666,13 +753,11 @@
################################################################
#
-# Proc: test_assoc_enforced
+# Proc: test_limits_enforced
#
-# Purpose: Determine if we need an association to run a job.
-# This is based upon
-# the value of AccountingStorageEnforce in the slurm.conf.
+# Purpose: Check if AccountingStorageEnforce limits is set
#
-# Returns level of association enforcement, 0 if none
+# Returns 1 if limits is set, else 0
#
################################################################
proc test_limits_enforced { } {
@@ -1200,6 +1285,36 @@
################################################################
#
+# Proc: test_serial
+#
+# Purpose: Determine if the system runs only serial jobs
+#
+# Returns 1 if the system is serial, 0 otherwise
+#
+################################################################
+
+proc test_serial { } {
+ global scontrol bin_bash bin_grep
+
+ log_user 0
+ set serial 0
+ spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectType"
+ expect {
+ "select/serial" {
+ set serial 1
+ exp_continue
+ }
+ eof {
+ wait
+ }
+ }
+ log_user 1
+
+ return $serial
+}
+
+################################################################
+#
# Proc: test_emulated
#
# Purpose: Determine if the system is emulated (not running on
@@ -1404,10 +1519,20 @@
return $super_user
}
-# Create a 16 bit hex number from a signed decimal number
-# Replace all non-decimal characters
+################################################################
+#
+# Proc: dec2hex16
+#
+# Purpose: Create a 16 bit hex number from a signed decimal number
+#
+# Returns: 16 bit hex version of input 'value'
+#
+# Input: value -- decimal number to convert
+#
# Courtesy of Chris Cornish
# http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
+################################################################
+# Replace all non-decimal characters
proc dec2hex16 {value} {
regsub -all {[^0-x\.-]} $value {} newtemp
set value [string trim $newtemp]
@@ -1421,10 +1546,20 @@
}
}
-# Create a 32 bit hex number from a signed decimal number
-# Replace all non-decimal characters
+################################################################
+#
+# Proc: dec2hex32
+#
+# Purpose: Create a 32 bit hex number from a signed decimal number
+#
+# Returns: 32 bit hex version of input 'value'
+#
+# Input: value -- decimal number to convert
+#
# Courtesy of Chris Cornish
# http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
+################################################################
+# Replace all non-decimal characters
proc dec2hex {value} {
regsub -all {[^0-x\.-]} $value {} newtemp
set value [string trim $newtemp]
@@ -1467,6 +1602,32 @@
return $available
}
+################################################################
+#
+# Proc: partition_shared
+#
+# Purpose: Determine the shared configuration of the specified
+# partition
+#
+# Returns: Return the shared configuration of the specified
+# partition
+#
+#
+# Input: partition - name of a partition
+#
+################################################################
+
+proc partition_shared { partition } {
+ global sinfo
+
+ set shared "No"
+ send_user "$sinfo --noheader --partition $partition --format %h\n"
+ set fd [open "|$sinfo --noheader --partition $partition --format %h"]
+ gets $fd line
+ catch {close $fd}
+ regexp {[a-zA-Z]+} $line shared
+ return $shared
+}
################################################################
#
@@ -1559,7 +1720,15 @@
}
################################################################
-# Given a hostname, return it's numeric suffix
+#
+# Proc: get_suffix
+#
+# Purpose: Given a hostname, return it's numeric suffix
+#
+# Returns: numerical suffix for input 'hostname'
+#
+# Input: hostname -- hostname for which to return suffix
+#
################################################################
proc get_suffix { hostname } {
set host_len [string length $hostname]
@@ -1587,9 +1756,13 @@
}
################################################################
-# Check if we are user root or SlurmUser
#
-# Return 1 if true, 0 if false
+# Proc: is_super_user
+#
+# Purpose: Check if we are user root or SlurmUser
+#
+# Returns: 1 if true, 0 if false
+#
################################################################
proc is_super_user { } {
@@ -1629,7 +1802,11 @@
################################################################
#
+# Proc: check_acct_associations
#
+# Purpose: Use sacctmgr to check associations
+#
+# Returns: 0 on any error
#
################################################################
proc check_acct_associations { } {
@@ -1703,7 +1880,11 @@
################################################################
#
+# Proc:check_accounting_admin_level
#
+# Purpose: get the admin_level for the current user
+#
+# Returns: admin_level for the current user
#
################################################################
proc check_accounting_admin_level { } {
@@ -1755,7 +1936,11 @@
################################################################
#
+# Proc: get_cluster_name
#
+# Purpose: get the cluster name
+#
+# Returns: name of the cluster
#
################################################################
proc get_cluster_name { } {
@@ -1965,6 +2150,41 @@
################################################################
#
+# Proc: get_bluegene_allow_sub_blocks
+#
+# Purpose: See if the BlueGene system allows sub blocks
+#
+# Returns 0 for no and 1 for yes.
+#
+################################################################
+
+proc get_bluegene_allow_sub_blocks { } {
+ global scontrol alpha
+
+ log_user 0
+ set type 0
+ set scon_pid [spawn -noecho $scontrol show config]
+ expect {
+ -re "AllowSubBlockAllocations" {
+ set type 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ slow_kill $scon_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ log_user 1
+
+ return $type
+}
+
+################################################################
+#
# Proc: get_node_cnt
#
# Purpose: Determine how many nodes are on the system
@@ -2000,6 +2220,42 @@
################################################################
#
+# Proc: get_node_cnt_in_part
+#
+# Purpose: Determine how many nodes are in a given partition
+#
+# Returns count of nodes in a partition or 0 if unknown
+#
+################################################################
+
+proc get_node_cnt_in_part { partition } {
+ global scontrol number
+
+# log_user 0
+ set node_cnt 0
+ set scon_pid [spawn -noecho $scontrol show partition $partition]
+ expect {
+ -re "not found" {
+ send_user "\nFAILURE: partition $partition doesn't exist\n"
+ }
+ -re "TotalNodes=($number)" {
+ set node_cnt $expect_out(1,string)
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ }
+ eof {
+ }
+ }
+# log_user 1
+
+ return $node_cnt
+}
+
+
+################################################################
+#
# Proc: print_success
#
# Purpose: Print success with test ID
diff --git a/testsuite/expect/inc12.3.1 b/testsuite/expect/inc12.3.1
new file mode 100644
index 0000000..c397d28
--- /dev/null
+++ b/testsuite/expect/inc12.3.1
@@ -0,0 +1,85 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test for accounting records of specific job names with their ID
+############################################################################
+# Copyright (C) 2012 SchedMD LLC.
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+source ./globals_accounting
+
+proc inc12_3_1 {job_id_1 job_id_2 job_name_1 job_name_2 test_acct} {
+
+ global sacct
+ global exit_code
+
+ set idmatch 0
+ send_user "\nSearch for job ID $job_id_1 having name $job_name_1\n"
+ send_user "\nJob id1 number: $job_id_1\n"
+ spawn $sacct -A '$test_acct' --name=$job_name_1 -p -o jobid
+ expect {
+ -re "$job_id_1" {
+ incr idmatch
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr idmatch
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+
+ }
+ if {$idmatch != 1} {
+ send_user "\nFAILURE: sacct could not match job id to job name\n"
+ set exit_code 1
+ }
+
+ set idmatch 0
+ send_user "\nSearch for job ID $job_id_2 having name $job_name_2\n"
+ spawn $sacct -A '$test_acct' --name=$job_name_2 -p -o jobid
+ expect {
+ -re "$job_id_1" {
+ incr idmatch
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr idmatch
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$idmatch != 1} {
+ send_user "\nFAILURE: sacct could not match job id to job name\n"
+ set exit_code 1
+ }
+}
diff --git a/testsuite/expect/inc12.3.2 b/testsuite/expect/inc12.3.2
new file mode 100755
index 0000000..c2f655a
--- /dev/null
+++ b/testsuite/expect/inc12.3.2
@@ -0,0 +1,146 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test for accounting records of specific jobs on specific nodes
+# This is a sub-test of test12.3
+############################################################################
+# Copyright (C) 2012 SchedMD LLC.
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+source ./globals_accounting
+
+proc inc12_3_2 {job_id_1 job_id_2 job_id_3 job_name_1 job_name_2 test_acct job_1_node_0 job_1_node_2 job_2_node_0 job_2_node_1} {
+
+ global alpha_numeric_under sacct
+ global exit_code
+
+ send_user "\nSearch for jobs $job_id_1 and $job_id_3 on node: $job_1_node_0\n"
+ set job_1_match 0
+ set job_2_match 0
+ spawn $sacct -A '$test_acct' -N$job_1_node_0 -p -o jobid,jobname
+ expect {
+ -re "$job_id_1" {
+ incr job_1_match
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr job_2_match
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct did not respond\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$job_1_match != 1} {
+ send_user "\nFAILURE:sacct was unable to find the job $job_id_1 on node $job_1_node_0 ($job_1_match != 1)\n"
+ set exit_code 1
+ }
+ if {$job_2_match != 0} {
+ send_user "\nFAILURE:sacct was found job $job_id_2 on node $job_1_node_0 ($job_2_match != 0)\n"
+ set exit_code 1
+ }
+
+
+ send_user "\nSearch for jobs $job_id_1 and $job_id_2 on node: $job_1_node_2\n"
+ set job_1_match 0
+ set job_2_match 0
+ spawn $sacct -A '$test_acct' -N$job_1_node_2 -p -o jobid,jobname
+ expect {
+ -re "$job_id_1" {
+ incr job_1_match
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr job_2_match
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct did not respond\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$job_1_match != 1} {
+ send_user "\nFAILURE:sacct was unable to find the job $job_id_1 on node $job_1_node_2 ($job_1_match != 1)\n"
+ set exit_code 1
+ }
+
+
+ send_user "\nSearch for jobs $job_id_1 and $job_id_2 on node: $job_2_node_0\n"
+ set job_1_match 0
+ set job_2_match 0
+ spawn $sacct -A '$test_acct' -N$job_2_node_0 -p -o jobid,jobname
+ expect {
+ -re "$job_id_1" {
+ incr job_1_match
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr job_2_match
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct did not respond\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$job_2_match != 1} {
+ send_user "\nFAILURE:sacct was unable to find the job $job_id_2 on node $job_2_node_0 ($job_2_match != 1)\n"
+ set exit_code 1
+ }
+
+
+ send_user "\nSearch for jobs $job_id_2 and $job_id_3 on node: $job_2_node_1\n"
+ set job_1_match 0
+ set job_2_match 0
+ spawn $sacct -A '$test_acct' -N$job_2_node_1 -p -o jobid,jobname
+ expect {
+ -re "$job_id_1" {
+ incr job_1_match
+ exp_continue
+ }
+ -re "$job_id_2" {
+ incr job_2_match
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct did not respond\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$job_2_match != 1} {
+ send_user "\nFAILURE: sacct was unable to find the job $job_id_2 on node $job_2_node_1 ($job_2_match != 1)\n"
+ set exit_code 1
+ }
+}
+
diff --git a/testsuite/expect/inc22.1.1 b/testsuite/expect/inc22.1.1
new file mode 100644
index 0000000..fe3f530
--- /dev/null
+++ b/testsuite/expect/inc22.1.1
@@ -0,0 +1,356 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Use sreport to get cluster usage for the first hour from
+# configuration set up in test22.1.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2008 Lawrence Livermore National Security.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# Portions Copyright (C) 2012 SchedMD LLC
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc22_1_1 {} {
+
+ global account1 accounts users cluster_cpus job1_start user1
+ global node0_down_start node0_cpus node1_cpus cluster sreport
+ global exit_code wc_key_track wckey1
+
+ # Mon Dec 31 23:00:00 2007
+ set period_start 1199170800
+
+ # Tue Jan 1 00:00:00 2008
+ set period_end 1199174400
+ set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
+ set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
+ set reported [expr ($period_end - $period_start) * $cluster_cpus]
+ set down [expr ($period_end-$node0_down_start) * $node0_cpus]
+ set alloc_sec [expr ($period_end-$job1_start) * $node1_cpus]
+ set wckey_alloc_sec $alloc_sec
+ set resv 0
+ set idle [expr $reported - ($down + $alloc_sec + $resv)]
+
+ set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
+ set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
+ set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
+ set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
+ set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
+
+ send_user "\nTesting sreport for first hour\n"
+ set matches 0
+ set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,reported]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$idle.$down.$alloc.$resv.$reported." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the UserUtilizationByAccount report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster UserUtilizationByAccount cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,account,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$user1.$account1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 2.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the AccountUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.root..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 3} {
+ send_user "\nFAILURE: sreport didn't give good info 3.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the UserUtilizationByWcKey report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$user1.$wckey1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 4.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the WckeyUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$wckey1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sreport not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 2} {
+ send_user "\nFAILURE: sreport didn't give good info 5.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the User Top report
+ set matches 0
+ set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 6.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size report
+ set matches 0
+ set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account1.0.$alloc_sec.0." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 7 $matches.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size by wckey report
+ set matches 0
+ set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1.0.$wckey_alloc_sec.0." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 8 $matches.\n"
+ incr exit_code 1
+ }
+
+ if {$exit_code} {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+}
diff --git a/testsuite/expect/inc22.1.2 b/testsuite/expect/inc22.1.2
new file mode 100644
index 0000000..45b3c88
--- /dev/null
+++ b/testsuite/expect/inc22.1.2
@@ -0,0 +1,333 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Use sreport to get cluster usage for the second hour from
+# configuration set up in test22.1.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2008 Lawrence Livermore National Security.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# Portions Copyright (C) 2012 SchedMD LLC
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc22_1_2 {} {
+
+ global node0_down_end node0_cpus node1_cpus cluster sreport
+ global my_pid accounts users cluster_cpus
+ global job1_end job1_cpus job2_elig job2_cpus account1 user1
+ global wckey1 wc_key_track exit_code
+
+ # Tue Jan 1 00:00:00 2008
+ set period_start 1199174400
+
+ # Tue Jan 1 01:00:00 2008
+ set period_end 1199178000
+ set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
+ set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
+
+ set reported [expr ($period_end - $period_start) * $cluster_cpus]
+ set down [expr ($node0_down_end-$period_start) * $node0_cpus]
+ set alloc_sec [expr ($job1_end-$period_start) * $job1_cpus]
+ set wckey_alloc_sec $alloc_sec
+ set resv [expr ($period_end - $job2_elig) * $job2_cpus]
+ set idle [expr $reported - ($down + $alloc_sec + $resv)]
+ # do the same logic inside the plugin to figure out the correct
+ # idle time and resv time
+ if {$idle < 0 } {
+ set resv [expr $resv + $idle]
+ set idle 0
+ if {$resv < 0} {
+ set resv 0
+ }
+ }
+ set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
+ set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
+ set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
+ set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
+ set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
+
+ send_user "\nTesting sreport for second hour\n"
+ set matches 0
+ set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,reported]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$idle.$down.$alloc.$resv.$reported." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 9 $matches.\n"
+ send_user "we are looking for $cluster.$idle.$down.$alloc.$resv.$reported.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the AccountUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.root..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 3} {
+ send_user "\nFAILURE: sreport didn't give good info 11.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the UserUtilizationByWckey report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$user1.$wckey1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 12.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the WckeyUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1..$alloc." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$wckey1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 2} {
+ send_user "\nFAILURE: sreport didn't give good info 13.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the User Top report
+ set matches 0
+ set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account1.$user1.$alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 14.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size report
+ set matches 0
+ set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account1.0.$alloc_sec.0." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 15 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size by wckey report
+ set matches 0
+ set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1.0.$wckey_alloc_sec.0." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 16 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+
+}
diff --git a/testsuite/expect/inc22.1.3 b/testsuite/expect/inc22.1.3
new file mode 100644
index 0000000..2f19fc4
--- /dev/null
+++ b/testsuite/expect/inc22.1.3
@@ -0,0 +1,462 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Use sreport to get cluster usage for the first 3 days from
+# configuration set up in test22.1.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2008 Lawrence Livermore National Security.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# Portions Copyright (C) 2012 SchedMD LLC
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc22_1_3 { } {
+
+ global sreport exit_code node0_down_start node0_down_end node0_cpus
+ global node1_cpus cluster my_pid accounts users cluster_cpus
+ global job1_start job1_end job1_cpus job1_alloc job2_start
+ global job2_end job2_cpus job2_alloc job2_elig job3_start
+ global job3_end job3_cpus job3_alloc job3_elig acct1_alloc acct2_alloc
+ global acct3_alloc total_alloc wckey1_alloc user1_wckey1_alloc
+ global user2_wckey1_alloc user1 user2 account1 account2 account3
+ global wckey1 wc_key_track
+
+ # Mon Dec 31 23:00:00 2007
+ set period_start 1199170800
+
+ # Tue Jan 3 00:00:00 2008
+ set period_end 1199347200
+ set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
+ set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
+
+ set reported [expr ($period_end - $period_start) * $cluster_cpus]
+ set down [expr ($node0_down_end - $node0_down_start) * $node0_cpus]
+ set alloc_sec [expr ($job1_end-$job1_start) * $job1_cpus]
+ set alloc_sec [expr $alloc_sec + (($job2_end-$job2_start) * $job2_cpus)]
+ set alloc_sec [expr $alloc_sec + (($job3_end-$job3_start) * $job3_cpus)]
+ set wckey_alloc_sec1 [expr $job1_alloc + $job3_alloc]
+ set wckey_alloc_sec2 $job2_alloc
+ set resv [expr ($job2_start - $job2_elig) * $job2_cpus]
+ set resv [expr $resv + (($job3_start - $job3_elig) * $job3_cpus)]
+ # I didn't have time to do the correct math here so I am just putting in 9000 which should be the correct value of over commit
+ set over 9000
+ set resv [expr $resv - $over]
+ set idle [expr $reported - ($down + $alloc_sec + $resv)]
+ # do the same logic inside the plugin to figure out the correct
+ # idle time and resv time
+ if {$idle < 0 } {
+ set resv [expr $resv + $idle]
+ set idle 0
+ if {$resv < 0} {
+ set resv 0
+ }
+ }
+ set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
+ set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
+ set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
+ set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
+ set over [format "%d\\\(%.2f%%\\\)" $over [expr double($over * 100)/$reported]]
+
+ set job1_alloc_str [format "%d\\\(%.2f%%\\\)" $job1_alloc [expr double($job1_alloc * 100)/$reported]]
+ set job2_alloc_str [format "%d\\\(%.2f%%\\\)" $job2_alloc [expr double($job2_alloc * 100)/$reported]]
+ set job3_alloc_str [format "%d\\\(%.2f%%\\\)" $job3_alloc [expr double($job3_alloc * 100)/$reported]]
+ set total_alloc_str [format "%d\\\(%.2f%%\\\)" $total_alloc [expr double($total_alloc * 100)/$reported]]
+ set acct1_alloc_str [format "%d\\\(%.2f%%\\\)" $acct1_alloc [expr double($acct1_alloc * 100)/$reported]]
+ set acct2_alloc_str [format "%d\\\(%.2f%%\\\)" $acct2_alloc [expr double($acct2_alloc * 100)/$reported]]
+ set acct3_alloc_str [format "%d\\\(%.2f%%\\\)" $acct3_alloc [expr double($acct3_alloc * 100)/$reported]]
+ set wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $wckey1_alloc [expr double($wckey1_alloc * 100)/$reported]]
+ set user1_wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $user1_wckey1_alloc [expr double($user1_wckey1_alloc * 100)/$reported]]
+ set user2_wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $user2_wckey1_alloc [expr double($user2_wckey1_alloc * 100)/$reported]]
+
+ set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
+
+
+ send_user "\nTesting sreport for 3 days\n"
+ set matches 0
+ set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,over,reported]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$idle.$down.$alloc.$resv.$over.$reported." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 17 $matches.\n"
+ send_user "we are looking for $cluster.$idle.$down.$alloc.$resv.$over.$reported.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the UserUtilizationByAccount report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster UserUtilizationByAccount cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,account,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$user2.$account3.$job2_alloc_str." {
+ send_user "got 2\n"
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$user1.$account1.$job1_alloc_str." {
+ send_user "got 1\n"
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$user1.$account2.$job3_alloc_str." {
+ send_user "got 3\n"
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 3} {
+ send_user "\nFAILURE: sreport didn't give good info 18 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the AccountUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.root..$total_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1..$acct1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1.$user1.$job1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account2..$acct2_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account2.$user1.$job3_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account3..$acct3_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account3.$user2.$job2_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 7} {
+ send_user "\nFAILURE: sreport didn't give good info 19 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the UserUtilizationByWckey report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$user2.$wckey1.$user2_wckey1_alloc_str." {
+ send_user "got 2\n"
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$user1.$wckey1.$user1_wckey1_alloc_str." {
+ send_user "got 1\n"
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 2} {
+ send_user "\nFAILURE: sreport didn't give good info 20 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the WckeyUtilizationByUser report
+ set matches 0
+ set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1..$wckey1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$wckey1.$user1.$user1_wckey1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$wckey1.$user2.$user2_wckey1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$wc_key_track == 1 && $matches != 3} {
+ send_user "\nFAILURE: sreport didn't give good info 21 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the User Top report
+ set matches 0
+ set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account3.$user2.$job2_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account1.$user1.$job1_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account2.$user1.$job3_alloc_str." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 3} {
+ send_user "\nFAILURE: sreport didn't give good info 22 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size report
+ set matches 0
+ set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account1.0.$job1_alloc.0." {
+ incr matches
+ exp_continue
+ }
+ -re "$cluster.$account2.0.$job3_alloc.$job2_alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 2} {
+ send_user "\nFAILURE: sreport didn't give good info 23 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size report down a level
+ set matches 0
+ set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' account='$account2' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$account3.0.0.$job2_alloc." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 1} {
+ send_user "\nFAILURE: sreport didn't give good info 24 $matches.\n"
+ incr exit_code 1
+ }
+
+ if { $exit_code } {
+ incr exit_code [archive_load $sql_rem]
+ remove_user "" "" $users
+ remove_acct "" $accounts
+ remove_cluster "$cluster"
+ exit $exit_code
+ }
+
+ # test the Job Size by wckey report
+ set matches 0
+ set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
+ expect {
+ -re "There was a problem" {
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ incr exit_code 1
+ }
+ -re "$cluster.$wckey1.0.$wckey_alloc_sec1.$wckey_alloc_sec2." {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr archive load not responding\n"
+ slow_kill $my_pid
+ incr exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+}
diff --git a/testsuite/expect/inc22.1.4 b/testsuite/expect/inc22.1.4
new file mode 100644
index 0000000..d3fce87
--- /dev/null
+++ b/testsuite/expect/inc22.1.4
@@ -0,0 +1,111 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test for jobs that ran on a node at a certain time from
+# configuration set up in test22.1.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2008 Lawrence Livermore National Security.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# Portions Copyright (C) 2012 SchedMD LLC
+# Written by Nathan Yee <nyee32@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc22_1_4 {} {
+
+
+ global sacct job1_acct job2_acct job3_acct job1_start_str
+ global job1_end_str job2_start_str job2_end_str job3_start_str
+ global exit_code job3_end_str start_str end_str cluster
+
+
+ #Test for job 1
+ send_user "\nSearch for job1 on cluster $cluster\n"
+ set usermatch 0
+ spawn $sacct -p -C $cluster --state=completed --start=$job1_start_str --end=$job1_end_str --fields=node,jobname,jobid
+ expect {
+ -re "$job1_acct" {
+ incr usermatch
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ if {$usermatch !=1} {
+ send_user"\nFAILURE: sacct did not work\n"
+ set exit_code 1
+ }
+ }
+
+ # Test for job2
+ send_user "\nSearch for job2 on cluster $cluster\n"
+ set usermatch 0
+ spawn $sacct -p -C $cluster --state=completed --start=$job2_start_str --end=$job2_end_str --fields=node,jobname,jobid
+ expect {
+ -re "$job2_acct" {
+ incr usermatch
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacct not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ if {$usermatch !=1} {
+ send_user"\nFAILURE: sacct did not work\n"
+ set exit_code 1
+ }
+ }
+
+ # Test for job3
+ send_user "\nSearch for job3 on cluster $cluster\n"
+ set usermatch 0
+ spawn $sacct -p -C $cluster --state=completed --start=$job3_start_str --end=$job3_end_str --fields=node,jobname,jobid
+ expect {
+ -re "$job3_acct" {
+ incr usermatch
+ exp_continue
+ }
+ timeout {
+ send_user "sacct not responding"
+ exit_code 1
+ }
+ eof {
+ wait
+ }
+ if {$usermatch !=1} {
+ send_user "\nFAILURE: sacct did not work\n"
+ set exit_code 1
+ }
+ }
+}
diff --git a/testsuite/expect/inc3.11.1 b/testsuite/expect/inc3.11.1
new file mode 100644
index 0000000..3e7ff84
--- /dev/null
+++ b/testsuite/expect/inc3.11.1
@@ -0,0 +1,97 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# Make a list of lists with a series of parameters to test.
+# All the tests in goodtests should pass, all those in badtests
+# should fail.
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_1 {} {
+ global def_node user_name def_partition exit_code res_name
+
+ # TEST 1
+ # Make a list of lists with a series of parameters to test. All the tests
+ # in goodtests should pass, all those in badtests should fail.
+ #
+ send_user "\n+++++ STARTING TEST 1 +++++\n"
+ set badtests "
+ {}
+ {Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=midnight Duration=600 User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=ALL Flags=ignore_jobs}
+ {StartTime=now Duration=5 NodeCnt= Nodes= User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 NodeCnt=1,A}
+ {StartTime=now Duration=5 User=$user_name Flags=ignore_jobs}
+ {StartTime=blah Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=foo Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name PartitionName=badpartname Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=badtype,ignore_jobs}
+ {StartTime=now+10minutes EndTime=now Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name Licenses=DUMMY_FOR_TESTING Flags=ignore_jobs}
+"
+ # {StartTime=now Duration=5 Nodes=$def_node Account=badaccountname}
+
+ foreach test $badtests {
+ set ret_code [create_res $test 1]
+ if {$ret_code == 0} {
+ send_user "\nFAILURE: Reservation $test did not fail but should have\n"
+ delete_res $res_name
+ exit 1
+ } else {
+ send_user "Expected error. You can turn that frown upside-down.\n"
+ }
+ }
+
+ if {[test_super_user] == 0} {
+ send_user "\nWARNING: can not test more unless SlurmUser or root\n"
+ exit $exit_code
+ }
+
+ set goodtests "
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now+5minutes EndTime=now+10minutes Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=midnight Duration=600 Nodes=$def_node User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=ALL User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 NodeCnt=1 User=$user_name Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name PartitionName=$def_partition Flags=ignore_jobs}
+ {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=Maint Flags=ignore_jobs}
+"
+ foreach test $goodtests {
+ set ret_code [create_res $test 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+ }
+}
diff --git a/testsuite/expect/inc3.11.2 b/testsuite/expect/inc3.11.2
new file mode 100644
index 0000000..130a0da
--- /dev/null
+++ b/testsuite/expect/inc3.11.2
@@ -0,0 +1,75 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# Create a reservation and update it in various ways
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_2 {} {
+ global def_node user_name def_partition exit_code res_name
+
+
+ # TEST 2
+
+ send_user "\n+++++ STARTING TEST 2 +++++\n"
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=$user_name" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+
+ set goodupdates "
+ {PartitionName=$def_partition}
+ {PartitionName=}
+ {Duration=90}
+ {StartTime=now+30minutes}
+ {EndTime=now+60minutes Flags=Maint NodeCnt=1 Nodes=}
+ {Nodes=$def_node}
+"
+ # {Flags=Maint}
+ # {Flags=}
+
+ if {$user_name != "root"} {
+ lappend goodupdates {Users+=root} {Users-=root}
+ }
+
+ foreach test $goodupdates {
+ set ret_code [update_res $res_name $test 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ set exit_code 1
+ break
+ }
+
+ }
+
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+}
diff --git a/testsuite/expect/inc3.11.3 b/testsuite/expect/inc3.11.3
new file mode 100644
index 0000000..35f1c3e
--- /dev/null
+++ b/testsuite/expect/inc3.11.3
@@ -0,0 +1,123 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# Make a reservation, submit a job to it, confirm that the
+# job is paired with the reservation.
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_3 {} {
+ global user_name exit_code res_name
+ global bin_rm file_in bin_sleep sbatch number scontrol
+ global alpha_numeric_under scancel
+
+ # TEST 3
+
+ send_user "\n+++++ STARTING TEST 3 +++++\n"
+ # Make the reservation
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=$user_name" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+
+ # Make the job script
+ exec $bin_rm -f $file_in
+ make_bash_script $file_in "$bin_sleep 10"
+
+ # Submit the batch job
+ set sbatch_pid [spawn $sbatch -N1 --reservation=$res_name $file_in]
+ expect {
+ -re "Submitted batch job ($number)" {
+ set job_id $expect_out(1,string)
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sbatch not responding\n"
+ slow_kill $sbatch_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$job_id == 0} {
+ send_user "\nFAILURE: batch submit failure\n"
+ exit 1
+ }
+
+ # Show the job, make sure reservation tag is right
+ spawn $scontrol show job $job_id
+ expect {
+ -re "Reservation=($alpha_numeric_under)" {
+ set tmp_res_name $expect_out(1,string)
+ if {$tmp_res_name != $res_name} {
+ send_user "\nFAILURE: problem submitting a job to a "
+ send_user "reservation. Job $job_id is running on "
+ send_user "reservation $tmp_res_name, not $res_name\n"
+ set exit_code 1
+ exp_continue
+ }
+ }
+ -re "Invalid job id specified" {
+ send_user "\nFAILURE: Job $job_id not found\n"
+ set exit_code 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ # Cancel the job
+ spawn $scancel -v $job_id
+ expect {
+ -re "Invalid job_id" {
+ send_user "\nFAILURE: Error cancelling the job submitted "
+ send_user "to the reservation. Job $job_id not found\n"
+ set exit_code 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scancel not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ # Delete the reservation
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ exit $ret_code
+ }
+}
diff --git a/testsuite/expect/inc3.11.4 b/testsuite/expect/inc3.11.4
new file mode 100644
index 0000000..9840c33
--- /dev/null
+++ b/testsuite/expect/inc3.11.4
@@ -0,0 +1,93 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# If not running as root, make a reservation restricted to root,
+# submit a job, and confirm that the job is rejected.
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_4 {} {
+ global user_name exit_code res_name
+ global sbatch file_in number
+
+ # TEST 4
+ # If not running as root, make a reservation restricted to root, submit a job,
+ # and confirm that the job is rejected.
+ #
+ send_user "\n+++++ STARTING TEST 4 +++++\n"
+
+ if {$user_name == "root"} {
+ return;
+ }
+
+ # Make the reservation
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=root" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a reservation\n"
+ set exit_code 1
+ }
+
+ # Submit the batch job
+ set denied 0
+ set sbatch_pid [spawn $sbatch -N1 --reservation=$res_name $file_in]
+ expect {
+ -re "Submitted batch job ($number)" {
+ set job_id $expect_out(1,string)
+ exec $scancel $job_id
+ send_user "\nFAILURE: job submit should have been denied\n"
+ set exit_code 1
+ exp_continue
+ }
+ -re "Batch job submission failed: Access denied to requested reservation" {
+ # Job was correctly denied
+ set denied 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sbatch not responding\n"
+ slow_kill $sbatch_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$denied == 0} {
+ send_user "\nFAILURE: Job $job_id should have been rejected "
+ send_user "from reservation restricted to root. Expected "
+ send_user "rejection message not given.\n"
+ set exit_code 1
+ } else {
+ send_user "Expected error, no worries mate.\n"
+ }
+ # Delete the reservation
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ exit $ret_code
+ }
+}
diff --git a/testsuite/expect/inc3.11.5 b/testsuite/expect/inc3.11.5
new file mode 100644
index 0000000..8f2b88c
--- /dev/null
+++ b/testsuite/expect/inc3.11.5
@@ -0,0 +1,64 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# Test node reservation conflict
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_5 {} {
+ global user_name exit_code res_name
+
+
+ # TEST 5
+ # Test node reservation conflict
+ #
+ send_user "\n+++++ STARTING TEST 5 +++++\n"
+ # Make the reservation
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+
+ set res_name_save $res_name
+
+ # Test for node reservation conflict
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 user=$user_name" 1]
+ if {$ret_code == 0} {
+ send_user "\nFAILURE: Reservation $test did not fail but should have\n"
+ delete_res $res_name
+ exit 1
+ } else {
+ send_user "Expected error. You can turn that frown upside-down.\n"
+ }
+
+ # Delete the reservation
+ set ret_code [delete_res $res_name_save]
+ if {$ret_code != 0} {
+ exit $ret_code
+ }
+}
diff --git a/testsuite/expect/inc3.11.6 b/testsuite/expect/inc3.11.6
new file mode 100644
index 0000000..18a861c
--- /dev/null
+++ b/testsuite/expect/inc3.11.6
@@ -0,0 +1,97 @@
+############################################################################
+# Purpose: Test of SLURM functionality
+# to be called from test3.11
+# Test time reservation conflict
+#
+############################################################################
+# Copyright (C) 2009 Lawrence Livermore National Security
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+
+proc inc3_11_6 {} {
+ global user_name exit_code res_name file_in bin_rm
+
+
+ # TEST 6
+ # Test time reservation conflict
+ #
+ send_user "\n+++++ STARTING TEST 6 +++++\n"
+ # Make the reservation
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+
+ set res_name_save $res_name
+
+ # Test for time reservation conflict (front overlap)
+ set ret_code [create_res "StartTime=now+30minutes Duration=60 Nodes=ALL user=$user_name" 1]
+ if {$ret_code == 0} {
+ send_user "\nFAILURE: Reservation $test did not fail but should have\n"
+ delete_res $res_name
+ exit 1
+ } else {
+ send_user "Expected error. You can turn that frown upside-down.\n"
+ }
+
+ # Delete the reservation
+ set ret_code [delete_res $res_name_save]
+ if {$ret_code != 0} {
+ exit $ret_code
+ }
+
+
+ # Make the reservation
+ set ret_code [create_res "StartTime=now+30minutes Duration=60 Nodes=ALL user=$user_name" 0]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+
+ set res_name_save $res_name
+
+ # Test for time reservation conflict (trail overlap)
+ set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 1]
+ if {$ret_code == 0} {
+ send_user "\nFAILURE: Reservation $test did not fail but should have\n"
+ delete_res $res_name
+ exit 1
+ } else {
+ send_user "Expected error. You can turn that frown upside-down.\n"
+ }
+ # Delete the reservation
+ set ret_code [delete_res $res_name_save]
+ if {$ret_code != 0} {
+ exit $ret_code
+ }
+
+ #
+ # Remove the temporary job script
+ exec $bin_rm -f $file_in
+
+ if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+ }
+}
diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23
index cd3d05b..90998fb 100755
--- a/testsuite/expect/test1.23
+++ b/testsuite/expect/test1.23
@@ -44,6 +44,10 @@
send_user " or sched/wiki2 (Moab) schedulers\n"
exit $exit_code
}
+if {[test_bluegene]} {
+ send_user "\nWARNING: This test is incompatible with Bluegene systems\n"
+ exit $exit_code
+}
#
# Submit a job with invalid mincpus requirement
diff --git a/testsuite/expect/test1.29 b/testsuite/expect/test1.29
index 35075a7..a729ed5 100755
--- a/testsuite/expect/test1.29
+++ b/testsuite/expect/test1.29
@@ -220,6 +220,7 @@
if {$matches != 5} {
send_user "\nFAILURE: User limits not propagated got $matches matches\n"
+ send_user "Check PropagateResourceLimits configuration parameter\n"
send_user "Check $file_err for errors\n"
send_user "A long running slurmd could cause a file size limit error\n"
set exit_code 1
diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52
index f080900..6013a9c 100755
--- a/testsuite/expect/test1.52
+++ b/testsuite/expect/test1.52
@@ -59,7 +59,6 @@
exit 1
}
-re "MaxNodes=($number)" {
- send_user "here 1\n"
set max_nodes $expect_out(1,string)
exp_continue
}
diff --git a/testsuite/expect/test1.59 b/testsuite/expect/test1.59
index fdc12fc..e3c0d44 100755
--- a/testsuite/expect/test1.59
+++ b/testsuite/expect/test1.59
@@ -56,29 +56,15 @@
}
# find out if we have enough nodes to test functionality
set partition [default_partition]
-spawn $scontrol show partition $partition
-expect {
- -re "not found" {
- send_user "\nFAILURE: partition $partition doesn't exist\n"
- exit 1
- }
- -re "TotalNodes=($number)" {
- set node_count $expect_out(1,string)
- if { $node_count < 3 } {
- send_user "WARNING: system must have at least 3 \
- nodes to run this test on. This system \
- only has $node_count.\n"
- exit $exit_code
- }
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: scontrol not responding\n"
- exit 1
- }
- eof {
- }
+
+set node_count [get_node_cnt_in_part $partition]
+if { $node_count < 3 } {
+ send_user "WARNING: system must have at least 3 \
+ nodes to run this test on. This system \
+ only has $node_count.\n"
+ exit $exit_code
}
+
set node0 0
set node1 0
set node2 0
diff --git a/testsuite/expect/test1.62 b/testsuite/expect/test1.62
index 0724597..7d8483b 100755
--- a/testsuite/expect/test1.62
+++ b/testsuite/expect/test1.62
@@ -71,6 +71,10 @@
incr devices
exp_continue
}
+ -re "CUDA_VISIBLE_DEVICES=NoDevFiles" {
+ send_user "\nWARNING: This could indicate that gres.conf lacks device files for the GPUs\n"
+ exp_continue
+ }
timeout {
send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid
diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87
index 457ff29..0a63c10 100755
--- a/testsuite/expect/test1.87
+++ b/testsuite/expect/test1.87
@@ -76,6 +76,11 @@
slow_kill $srun_pid
exit 0
}
+ -re "Required node not available" {
+ send_user "\nWARNING: can't test srun task distribution\n"
+ exec $bin_rm -f $file_in
+ exit $exit_code
+ }
-re "Unable to contact" {
send_user "\nFAILURE: slurm appears to be down\n"
exit 1
diff --git a/testsuite/expect/test1.88 b/testsuite/expect/test1.88
index 5e25ce8..08bcaa1 100755
--- a/testsuite/expect/test1.88
+++ b/testsuite/expect/test1.88
@@ -69,7 +69,7 @@
exec $bin_rm -f $test_prog ${test_prog}.o
if {$use_pmi} {
set pmi_link "-rpath $slurm_dir/lib -L $slurm_dir/lib -lpmi"
- exec $mpicc -Xlinker $pmi_link -o $test_prog ${test_prog}.c
+ eval exec $mpicc -Xlinker $pmi_link -o $test_prog ${test_prog}.c
} else {
exec $mpicc -o $test_prog ${test_prog}.c
}
@@ -154,13 +154,17 @@
#
# Check for desired output in stdout
#
-set expected [expr 6 + 6 + 4 + 4]
+set expected_msg [expr 6 + 6 + 4 + 4]
+set expected_sum [expr 30 + 30 + 12 + 12]
if {[wait_for_file $file_out] == 0} {
- set matches 0
set complete 0
+ set matches 0
+ set rank_sum 0
spawn $bin_cat $file_out
expect {
- -re "just received msg from Rank" {
+ -re "Rank.($number). on $alpha_numeric_under just received msg from Rank ($number)" {
+ incr rank_sum $expect_out(1,string)
+ incr rank_sum $expect_out(2,string)
incr matches
exp_continue
}
@@ -178,12 +182,15 @@
send_user "with the configured switch\n"
send_user " Core files may be present from failed MPI tasks\n\n"
set exit_code 1
- } elseif {$matches != $expected} {
- send_user "\nFAILURE: unexpected output ($matches of $expected)\n"
+ } elseif {$matches != $expected_msg} {
+ send_user "\nFAILURE: unexpected output ($matches of $expected_msg)\n"
set exit_code 1
} elseif {$complete == 0} {
send_user "\nFAILURE: test failed to complete\n"
set exit_code 1
+ } elseif {$rank_sum != $expected_sum} {
+ send_user "\nFAILURE: Invalid rank values ($rank_sum != $expected_sum)\n"
+ set exit_code 1
}
} else {
set exit_code 1
diff --git a/testsuite/expect/test1.89 b/testsuite/expect/test1.89
index d8d8ae8..3cb4b7c 100755
--- a/testsuite/expect/test1.89
+++ b/testsuite/expect/test1.89
@@ -36,6 +36,7 @@
set test_id "1.89"
set exit_code 0
set file_prog "test$test_id.prog"
+set prompt "PROMPT:"
print_header $test_id
@@ -90,11 +91,28 @@
# Create an allocation
#
set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash]
+expect {
+ -re "Granted job allocation ($number)" {
+ set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
+ exp_continue
+ }
+ -re "export PS1=\"$prompt\"" {
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: salloc not responding "
+ send_user "or failure to recognize prompt\n"
+ slow_kill $salloc_pid
+ exit 1
+ }
+ -re $prompt {
+ }
+}
#
# Run a job step to get allocated processor count and affinity
#
-expect -re $prompt
set mask 0
set task_cnt 0
send "$srun -c1 $file_prog\r"
diff --git a/testsuite/expect/test1.90 b/testsuite/expect/test1.90
index ca76902..7e6182c 100755
--- a/testsuite/expect/test1.90
+++ b/testsuite/expect/test1.90
@@ -36,6 +36,7 @@
set test_id "1.90"
set exit_code 0
set file_prog "test$test_id.prog"
+set prompt "PROMPT:"
print_header $test_id
@@ -109,11 +110,28 @@
# Create an allocation
#
set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash]
+expect {
+ -re "Granted job allocation ($number)" {
+ set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
+ exp_continue
+ }
+ -re "export PS1=\"$prompt\"" {
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: salloc not responding "
+ send_user "or failure to recognize prompt\n"
+ slow_kill $salloc_pid
+ exit 1
+ }
+ -re $prompt {
+ }
+}
#
# Run a job step to get allocated processor count and affinity
#
-expect -re $prompt
set full_mask -1
set timeout $max_job_delay
send "$srun -c1 $file_prog\r"
diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91
index ae45ca0..8ff3332 100755
--- a/testsuite/expect/test1.91
+++ b/testsuite/expect/test1.91
@@ -37,6 +37,7 @@
set test_id "1.91"
set exit_code 0
set file_prog "test$test_id.prog"
+set prompt "PROMPT:"
print_header $test_id
@@ -146,12 +147,29 @@
global env
set env(SLURM_CPU_BIND) "verbose"
set salloc_pid [spawn $salloc -w $node_name -N1 --exclusive --verbose -t2 $bin_bash]
+expect {
+ -re "Granted job allocation ($number)" {
+ set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
+ exp_continue
+ }
+ -re "export PS1=\"$prompt\"" {
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: salloc not responding "
+ send_user "or failure to recognize prompt\n"
+ slow_kill $salloc_pid
+ exit 1
+ }
+ -re $prompt {
+ }
+}
#############################################################################
#
# Run a job step to get allocated processor count and affinity
#
-expect -re $prompt
set mask 0
set task_cnt 0
send "$srun -c1 $file_prog\r"
diff --git a/testsuite/expect/test1.94 b/testsuite/expect/test1.94
new file mode 100755
index 0000000..d857451
--- /dev/null
+++ b/testsuite/expect/test1.94
@@ -0,0 +1,215 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test of MPICH2 task spawn logic
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "WARNING: ..." with an explanation of why the test can't be made, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2002 The Regents of the University of California.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Morris Jette <jette1@llnl.gov>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "1.94"
+set exit_code 0
+set file_in "test$test_id.input"
+set file_out "test$test_id.output"
+set file_err "test$test_id.error"
+set master_prog "test$test_id.master"
+set slave_prog "test$test_id.slave"
+set job_id 0
+
+print_header $test_id
+
+#
+# Test for existence of mpi compiler
+#
+if {[info exists mpicc] == 0} {
+ send_user "\nWARNING: mpicc not defined, can't perform mpi testing\n"
+ exit 0
+}
+if {[file executable $mpicc] == 0} {
+ send_user "\nWARNING: $mpicc does not exists\n"
+ exit 0
+}
+#
+# Test is only works with mpi/pmi2 plugin.
+#
+set invalid 1
+log_user 0
+spawn $scontrol show config
+expect {
+ -re "pmi2" {
+ set invalid 0
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+log_user 1
+if {$invalid == 1} {
+ send_user "\nWARNING: test compatible only with mpi/pmi2\n"
+ exit $exit_code
+}
+
+#
+# Delete left-over program and rebuild it
+#
+exec $bin_rm -f $file_in $file_out $file_err
+exec $bin_rm -f $master_prog ${master_prog}.o
+exec $bin_rm -f $slave_prog ${slave_prog}.o
+exec $mpicc -o $master_prog ${master_prog}.c
+exec $mpicc -o $slave_prog ${slave_prog}.c
+
+# Delete left-over stdout/err files
+file delete $file_out $file_err
+
+#
+# Build input script file
+#
+make_bash_script $file_in "
+ $srun -n1 $master_prog $slave_prog
+"
+
+#
+# Spawn an sbatch job that uses stdout/err and confirm their contents
+#
+set timeout $max_job_delay
+set no_start 0
+set sbatch_pid [spawn $sbatch -n4 --output=$file_out --error=$file_err -t1 $file_in]
+expect {
+ -re "Submitted batch job ($number)" {
+ set job_id $expect_out(1,string)
+ exp_continue
+ }
+ -re "Batch job submission failed" {
+ set no_start 1
+ exp_continue
+ }
+ -re "Unable to contact" {
+ send_user "\nFAILURE: slurm appears to be down\n"
+ exit 1
+ }
+ timeout {
+ send_user "\nFAILURE: srun not responding\n"
+ slow_kill $sbatch_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+if {$no_start != 0} {
+ send_user "\nWARNING: partition too small for test\n"
+ if {$job_id != 0} {
+ cancel_job $job_id
+ }
+ exit 0
+}
+if {$job_id == 0} {
+ send_user "\nFAILURE: batch submit failure\n"
+ exit 1
+}
+
+#
+# Wait for job to complete
+#
+if {[wait_for_job $job_id "DONE"] != 0} {
+ send_user "\nFAILURE: waiting for job to complete\n"
+ set exit_code 1
+}
+
+#
+# Check for desired output in stdout
+#
+set expected_msg 4
+set expected_sum 12
+if {[wait_for_file $file_out] == 0} {
+ set complete 0
+ set matches 0
+ set rank_sum 0
+ spawn $bin_cat $file_out
+ expect {
+ -re "Rank.($number). on $alpha_numeric_under just received msg from Rank ($number)" {
+ incr rank_sum $expect_out(1,string)
+ incr rank_sum $expect_out(2,string)
+ incr matches
+ exp_continue
+ }
+ eof {
+ wait
+ }
+ }
+ if {$matches == 0} {
+ send_user "\nFAILURE: No MPI communications occurred\n"
+ send_user " The version of MPI you are using may be incompatible "
+ send_user "with the configured switch\n"
+ send_user " Core files may be present from failed MPI tasks\n\n"
+ set exit_code 1
+ } elseif {$matches != $expected_msg} {
+ send_user "\nFAILURE: unexpected output ($matches of $expected_msg)\n"
+ set exit_code 1
+ } elseif {$rank_sum != $expected_sum} {
+ send_user "\nFAILURE: Invalid rank values ($rank_sum != $expected_sum)\n"
+ set exit_code 1
+ }
+} else {
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ exec $bin_rm -f $file_in $file_out $file_err
+ exec $bin_rm -f $master_prog ${master_prog}.o
+ exec $bin_rm -f $slave_prog ${slave_prog}.o
+ send_user "\nSUCCESS\n"
+} else {
+ set matches 0
+ spawn head $file_err
+ expect {
+ -re "Error creating CQ" {
+ incr matches
+ exp_continue
+ }
+ eof {
+ wait
+ }
+ }
+ if {$matches != 0} {
+ send_user "WARNING: If using MVAPICH then\n"
+ send_user " Configure \"PropagateResourceLimitsExcept=MEMLOCK\"\n"
+ send_user " Also start slurmd with \"ulimit -l unlimited\"\n"
+ } else {
+ send_user "Check contents of $file_err\n"
+ }
+}
+
+exit $exit_code
diff --git a/testsuite/expect/test1.94.master.c b/testsuite/expect/test1.94.master.c
new file mode 100644
index 0000000..1d2a466
--- /dev/null
+++ b/testsuite/expect/test1.94.master.c
@@ -0,0 +1,69 @@
+/*****************************************************************************\
+ * test1.94.master.c - Test of MPICH2 task spawn logic
+ *****************************************************************************
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+ int world_size, universe_size = 0, *universe_sizep, rank, flag, rc;
+ MPI_Comm everyone; /* intercommunicator */
+
+ if (argc < 2) {
+ printf("FAILURE: Usage %s <slave_program>\n", argv[0]);
+ exit(1);
+ }
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+ if (world_size != 1) {
+ printf("FAILURE: Started %d master processes\n", world_size);
+ exit(1);
+ }
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ //printf("master rank:%d\n",rank);
+
+ /* NOTE: Ideally MPI_UNIVERSE_SIZE would be the size of the job
+ * allocation. Presently it is the size of the job step allocation.
+ * In any case, additional tasks can be spawned */
+ MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_UNIVERSE_SIZE,
+ &universe_sizep, &flag);
+ if (flag) {
+ universe_size = *universe_sizep;
+ //printf("MPI_UNIVERSE_SIZE is %d\n", universe_size);
+ }
+ if (universe_size < 2)
+ universe_size = 5;
+
+ rc = MPI_Comm_spawn(argv[1], MPI_ARGV_NULL, universe_size-1,
+ MPI_INFO_NULL, 0, MPI_COMM_SELF, &everyone,
+ MPI_ERRCODES_IGNORE);
+ if (rc != MPI_SUCCESS) {
+ printf("FAILURE: MPI_Comm_spawn(): %d\n", rc);
+ exit(1);
+ }
+
+ MPI_Finalize();
+ exit(0);
+}
diff --git a/testsuite/expect/test1.94.slave.c b/testsuite/expect/test1.94.slave.c
new file mode 100644
index 0000000..9fd9841
--- /dev/null
+++ b/testsuite/expect/test1.94.slave.c
@@ -0,0 +1,79 @@
+/*****************************************************************************\
+ * test1.93.slave.c - Simple ping test of operation with SLURM.
+ *****************************************************************************
+ * Copyright (C) 2004 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Dong Ang <dahn@llnl.gov>
+ * CODE-OCEC-09-009. All rights reserved.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.schedmd.com/slurmdocs/>.
+ * Please also read the included file: DISCLAIMER.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+\*****************************************************************************/
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#define COMM_TAG 1000
+
+static void pass_its_neighbor(const int rank, const int size, const int* buf)
+{
+ struct utsname uts;
+ MPI_Request request[2];
+ MPI_Status status[2];
+
+ MPI_Irecv((void *)buf, 1, MPI_INT, ((rank+size-1)%size), COMM_TAG,
+ MPI_COMM_WORLD, &request[0]);
+ MPI_Isend((void *)&rank, 1, MPI_INT, ((rank+1)%size), COMM_TAG,
+ MPI_COMM_WORLD, &request[1]);
+ MPI_Waitall(2, request, status);
+
+ uname(&uts);
+ fprintf(stdout, "Rank[%d] on %s just received msg from Rank %d\n",
+ rank, uts.nodename, *buf);
+}
+
+int main(int argc, char *argv[])
+{
+ int buf, size, rank, rc = 0;
+ MPI_Comm parent;
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_get_parent(&parent);
+ if (parent == MPI_COMM_NULL) {
+ printf("No parent!\n");
+ rc = 1;
+ goto fini;
+ }
+ MPI_Comm_remote_size(parent, &size);
+ if (size != 1) {
+ printf("Something's wrong with the parent\n");
+ rc = 2;
+ goto fini;
+ }
+ MPI_Comm_size(MPI_COMM_WORLD, &size);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ //printf("slave rank:%d size:%d\n", rank, size);
+
+ buf = rank; /* we only pass rank */
+ pass_its_neighbor(rank, size, &buf);
+
+fini: MPI_Finalize();
+ exit(rc);
+}
diff --git a/testsuite/expect/test10.13 b/testsuite/expect/test10.13
index c8081d6..d886edd 100755
--- a/testsuite/expect/test10.13
+++ b/testsuite/expect/test10.13
@@ -186,7 +186,7 @@
incr matches
exp_continue
}
- -re "BPs" {
+ -re "BPs|MPs" {
set stuff [concat $stuff "11"]
incr matches
exp_continue
diff --git a/testsuite/expect/test10.3 b/testsuite/expect/test10.3
index 7efb50d..3d56987 100755
--- a/testsuite/expect/test10.3
+++ b/testsuite/expect/test10.3
@@ -55,7 +55,7 @@
set too_small 1
exp_continue
}
- -re "ID" {
+ -re "ID " {
incr matches
exp_continue
}
@@ -80,7 +80,7 @@
send "q"
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
send "q"
exp_continue
@@ -100,7 +100,7 @@
exit 0
}
if {$matches != 6} {
- send_user "\nFAILURE: smap slurm partition display in curses mode\n"
+ send_user "\nFAILURE: smap slurm partition display in curses mode ($matches of 6)\n"
set exit_code 1
}
diff --git a/testsuite/expect/test10.4 b/testsuite/expect/test10.4
index 9e77a39..08c72ff 100755
--- a/testsuite/expect/test10.4
+++ b/testsuite/expect/test10.4
@@ -70,7 +70,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test10.5 b/testsuite/expect/test10.5
index 5f47939..c0adfb6 100755
--- a/testsuite/expect/test10.5
+++ b/testsuite/expect/test10.5
@@ -76,7 +76,7 @@
incr matches
exp_continue
}
- -re "USER" {
+ -re "JOBID|USER" {
set stuff [concat $stuff "4"]
incr matches
exp_continue
@@ -97,7 +97,7 @@
send "q\n"
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
set stuff [concat $stuff "7b"]
incr matches
send "q\n"
diff --git a/testsuite/expect/test10.6 b/testsuite/expect/test10.6
index 38a0493..3cdc7fe 100755
--- a/testsuite/expect/test10.6
+++ b/testsuite/expect/test10.6
@@ -36,7 +36,7 @@
set test_id "10.6"
set exit_code 0
set matches 0
-set non_bg 0
+set non_bg 0
print_header $test_id
@@ -67,10 +67,6 @@
incr matches
exp_continue
}
- -re "USER" {
- incr matches
- exp_continue
- }
-re "CONN" {
incr matches
exp_continue
@@ -79,11 +75,7 @@
incr matches
exp_continue
}
- -re "NODELIST" {
- incr matches
- exp_continue
- }
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
@@ -110,8 +102,8 @@
send_user "\nWARNING: Must be on a BG SYSTEM to run this command\n"
exit 0
}
-if {$matches != 7} {
- send_user "\nFAILURE: smap bg partition display in commandline mode\n"
+if {$matches != 6} {
+ send_user "\nFAILURE: smap bg partition display in commandline mode $matches\n"
set exit_code 1
}
diff --git a/testsuite/expect/test10.8 b/testsuite/expect/test10.8
index a384e9e..6cc26eb 100755
--- a/testsuite/expect/test10.8
+++ b/testsuite/expect/test10.8
@@ -91,7 +91,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
set stuff [concat $stuff "10"]
incr matches
exp_continue
diff --git a/testsuite/expect/test11.5 b/testsuite/expect/test11.5
index f6a249c..019a062 100755
--- a/testsuite/expect/test11.5
+++ b/testsuite/expect/test11.5
@@ -81,7 +81,7 @@
}
#
-# Wait for step to begin execution
+# Wait for step to begin execution
#
if {[wait_for_step $job_id.0] != 0} {
send_user "\nFAILURE: waiting for step $job_id.0 to run\n"
@@ -90,7 +90,7 @@
}
#
-# Enable/disable/test ability to heckpoint
+# Enable/disable/test ability to checkpoint
#
spawn $scontrol check disable $job_id.0
expect {
diff --git a/testsuite/expect/test11.7 b/testsuite/expect/test11.7
index f48c9a2..d3e1160 100755
--- a/testsuite/expect/test11.7
+++ b/testsuite/expect/test11.7
@@ -59,25 +59,14 @@
}
#find out if we have enough nodes to test functionality
-spawn $scontrol show partition $partition
-expect {
- -re "TotalNodes=($number)" {
- set node_count $expect_out(1,string)
- if { $node_count < 2 } {
- send_user "WARNING: system must have at least 2 \
- nodes to run this test on. This system \
- only has 2.\n"
- exit $exit_code
- }
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: scontrol not responding\n"
- exit 1
- }
- eof {
- }
+set node_count [get_node_cnt_in_part $partition]
+if { $node_count < 2 } {
+ send_user "WARNING: system must have at least 2 \
+ nodes to run this test on. This system \
+ only has $node_count.\n"
+ exit $exit_code
}
+
set node0 0
set node1 0
diff --git a/testsuite/expect/test12.2 b/testsuite/expect/test12.2
index 32f7219..36cc621 100755
--- a/testsuite/expect/test12.2
+++ b/testsuite/expect/test12.2
@@ -116,7 +116,7 @@
# Compute error in KB
set diff_mem [expr $mem_used - $mem_size]
set error_mem [expr abs($diff_mem)]
- if {$error_mem > 4100} {
+ if {$error_mem > 4200} {
send_user "\nFAILURE: sstat memory use discrepancy of $error_mem KB\n"
send_user " Wanted $mem_size KB, got $mem_used KB\n"
return 1
@@ -182,9 +182,14 @@
# Run a simple job
# Usage: test12.2.prog <exit_code> <sleep_secs> <mem_kb>
#
+set config_prob 0
set timeout [expr $max_job_delay + $sleep_time]
set sbatch_pid [spawn $sbatch --mem-per-cpu=1024 --output=$file_out --error=$file_err -t2 $file_in]
expect {
+ -re "Requested node configuration is not available" {
+ set config_prob 1
+ exp_continue
+ }
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
@@ -198,6 +203,10 @@
wait
}
}
+if {$config_prob != 0} {
+ send_user "\nWARNING: Unable to test with current node configuration\n"
+ exit 0
+}
if {$job_id == 0} {
send_user "\nFAILURE: batch submit failure\n"
exit 1
diff --git a/testsuite/expect/test12.3 b/testsuite/expect/test12.3
new file mode 100755
index 0000000..b71db53
--- /dev/null
+++ b/testsuite/expect/test12.3
@@ -0,0 +1,285 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test sacct filtering of records by account and job name.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC.
+# Copyright (C) 2008-2010 Lawrence Livermore National Security.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Joseph Donaghy <donaghy1@llnl.gov> and
+# Nathan Yee <nyee32@schedmd.com>
+# CODE-OCEC-09-009. All rights reserved.
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+source ./globals_accounting
+source ./inc12.3.1
+source ./inc12.3.2
+
+set test_id "12.3"
+set exit_code 0
+set job_name_1 "JOB1"
+set job_name_2 "JOB2"
+set job_name_3 "JOB3"
+set file_in "test.$test_id.input"
+set test_acct "slurm_test-account.1"
+
+print_header $test_id
+
+#
+# Checks if test is running on blue gene.
+#
+if {[test_bluegene]} {
+ send_user "\nWARNING: This test can't be run on a blue gene\n"
+ exit $exit_code
+} elseif {[test_serial]} {
+ send_user "\nWARNING: This test is incompatible with serial systems\n"
+ exit $exit_code
+}
+
+#
+# Check accounting config and bail if not found.
+#
+if { [test_account_storage] == 0 } {
+ send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n"
+ exit $exit_code
+}
+
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+ send_user "\nWARNING: This test can't be run without being an Accounting administrator.\n"
+ send_user "Use sacctmgr mod user \$USER_NAME set admin=admin.\n"
+ exit $exit_code
+}
+
+#
+# Make sure we have enough nodes to test functionality
+#
+set partition [default_partition]
+set node_count [get_node_cnt_in_part $partition]
+if { $node_count < 3 } {
+ send_user "WARNING: system must have at least 3 \
+ nodes to run this test on. This system \
+ only has $node_count.\n"
+ exit $exit_code
+}
+
+#
+# Identify the user and his current default account
+#
+set acct_name ""
+set user_name ""
+spawn $bin_id -u -n
+expect {
+ -re "($alpha_numeric_under)" {
+ set user_name $expect_out(1,string)
+ exp_continue
+ }
+ eof {
+ wait
+ }
+}
+set s_pid [spawn $sacctmgr show user $user_name]
+expect {
+ -re "$user_name *($alpha_numeric_under)" {
+ set acct_name $expect_out(1,string)
+ exp_continue
+ }
+ timeout {
+ send_user "FAILURE: sacctmgr add not responding\n"
+ slow_kill $s_pid
+ exit 1
+ }
+ eof {
+ wait
+ }
+}
+
+#
+# Use sacctmgr to add a test account
+#
+set aamatches 0
+set sadd_pid [spawn $sacctmgr -i add account $test_acct]
+expect {
+ -re "Adding Account" {
+ incr aamatches
+ exp_continue
+ }
+ -re "Nothing new added" {
+ send_user "\nWARNING: vestigial account $test_acct found\n"
+ incr aamatches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr add not responding\n"
+ slow_kill $sadd_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$aamatches != 1} {
+ send_user "\nFAILURE: sacctmgr had a problem adding account.\n"
+ exit 1
+}
+
+#
+# Add self to this new account
+#
+set sadd_pid [spawn $sacctmgr -i create user name=$user_name account=$test_acct]
+expect {
+ timeout {
+ send_user "\nFAILURE: sacctmgr add not responding\n"
+ slow_kill $sadd_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+#
+# Submit two slurm jobs to capture job info
+#
+set job_id_1 0
+set job_1_node_inx 0
+set timeout $max_job_delay
+set srun_pid [spawn $srun -N2 -t1 -A $test_acct -J $job_name_1 -v $bin_id]
+expect {
+ -re "launching ($number).0 on host ($alpha_numeric_under)," {
+ set job_id_1 $expect_out(1,string)
+ set job_1_node($job_1_node_inx) $expect_out(2,string)
+ incr job_1_node_inx
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: srun not responding\n"
+ slow_kill $srun_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$job_1_node_inx != 2} {
+ send_user "\nFAILURE: srun host count bad ($job_1_node_inx != 2)\n"
+ exit 1
+}
+
+#
+# Second job explicitly excludes node zero from previous job
+#
+set job_id_2 0
+set job_2_node_inx 0
+set srun_pid [spawn $srun -N2 -x $job_1_node(0) -t1 -A $test_acct -J $job_name_2 -v $bin_id]
+expect {
+
+ -re "launching ($number).0 on host ($alpha_numeric_under)," {
+ set job_id_2 $expect_out(1,string)
+ set job_2_node($job_2_node_inx) $expect_out(2,string)
+ incr job_2_node_inx
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: srun not responding\n"
+ slow_kill $srun_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$job_2_node_inx != 2} {
+ send_user "\nFAILURE: srun host count bad ($job_2_node_inx != 2)\n"
+ exit 1
+}
+
+#
+# Third job explicitly excludes node 1 from previous job
+#
+set job_id_3 0
+set job_3_node_inx 0
+set srun_pid [spawn $srun -w$job_1_node(0),$job_2_node(1) -t1 -A $test_acct -J $job_name_3 -v $bin_id]
+expect {
+
+ -re "launching ($number).0 on host ($alpha_numeric_under)," {
+ set job_id_3 $expect_out(1,string)
+ set job_3_node($job_3_node_inx) $expect_out(2,string)
+ incr job_3_node_inx
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: srun not responding\n"
+ slow_kill $srun_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$job_3_node_inx != 2} {
+ send_user "\nFAILURE: srun host count bad ($job_3_node_inx != 2)\n"
+ exit 1
+}
+
+#
+# Wait for accounting data to be propagated to slurmdbd
+#
+sleep 5
+
+#
+# Execute sub-tests based upon these jobs
+#
+inc12_3_1 $job_id_1 $job_id_2 $job_name_1 $job_name_2 $test_acct
+
+inc12_3_2 $job_id_1 $job_id_2 $job_id_3 $job_name_1 $job_name_2 $test_acct $job_1_node(0) $job_1_node(1) $job_2_node(0) $job_2_node(1)
+
+#
+# Use sacctmgr to delete the test account
+#
+set damatches 0
+set sadel_pid [spawn $sacctmgr -i delete account $test_acct]
+expect {
+ -re "Deleting account" {
+ incr damatches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sacctmgr delete not responding\n"
+ slow_kill $sadel_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$damatches != 1} {
+ send_user "\nFAILURE: sacctmgr had a problem deleting account\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ print_success $test_id
+}
+exit $exit_code
diff --git a/testsuite/expect/test12.4 b/testsuite/expect/test12.4
index 800c82c..da865f9 100755
--- a/testsuite/expect/test12.4
+++ b/testsuite/expect/test12.4
@@ -34,7 +34,6 @@
set test_id "12.4"
set exit_code 0
-set file_in "test.$test_id.input"
set test_acct "test_acct"
set timeout 60
print_header $test_id
@@ -143,9 +142,6 @@
}
}
-
-make_bash_script $file_in "$bin_id"
-
#
# Spawn a job via srun using this account
#
@@ -693,7 +689,6 @@
}
if {$exit_code == 0} {
- exec $bin_rm -f $file_in
send_user "\nSUCCESS\n"
}
exit $exit_code
diff --git a/testsuite/expect/test13.1 b/testsuite/expect/test13.1
index e57fd5b..86c7eb0 100755
--- a/testsuite/expect/test13.1
+++ b/testsuite/expect/test13.1
@@ -37,6 +37,11 @@
print_header $test_id
+if {[test_bluegene]} {
+ send_user "\nWARNING: This test is incompatible with Bluegene systems\n"
+ exit $exit_code
+}
+
#
# Report the slurm network and use it to establish window parameters
#
diff --git a/testsuite/expect/test15.19 b/testsuite/expect/test15.19
index b7500ca..0c5613f 100755
--- a/testsuite/expect/test15.19
+++ b/testsuite/expect/test15.19
@@ -43,6 +43,9 @@
if {[test_cray]} {
send_user "\nWARNING: This test is incompatible with Cray systems\n"
exit $exit_code
+} elseif {[test_serial]} {
+ send_user "\nWARNING: This test is incompatible with serial systems\n"
+ exit $exit_code
}
#
diff --git a/testsuite/expect/test15.5 b/testsuite/expect/test15.5
index 0a1cfa2..3bea6a9 100755
--- a/testsuite/expect/test15.5
+++ b/testsuite/expect/test15.5
@@ -44,6 +44,11 @@
print_header $test_id
+if {[test_cray]} {
+ send_user "\nWARNING: This test is incompatible with Cray systems\n"
+ exit $exit_code
+}
+
set inactive_limit $sleep_time
set kill_wait $sleep_time
set over_time_limit 0
diff --git a/testsuite/expect/test17.25 b/testsuite/expect/test17.25
index 15eb701..093059c 100755
--- a/testsuite/expect/test17.25
+++ b/testsuite/expect/test17.25
@@ -101,10 +101,10 @@
send_user "\nFAILURE: processed $matches of 3 environment variables\n"
set exit_code 1
}
+cancel_job $job_id
if {$exit_code == 0} {
exec $bin_rm -f $file_in
send_user "\nSUCCESS\n"
}
-cancel_job $job_id
exit $exit_code
diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21
index b298fef..2290611 100755
--- a/testsuite/expect/test21.21
+++ b/testsuite/expect/test21.21
@@ -41,6 +41,11 @@
set timeout 60
print_header $test_id
+if {[test_serial]} {
+ send_user "\nWARNING: This test is not compatible with serial system\n"
+ exit 0
+}
+
proc _test_limits { } {
global file_in srun sbatch squeue scancel bin_id number bin_sleep bin_rm ta
# test maxcpumin maxcpu maxjob maxnode maxsubmit maxwall
@@ -90,7 +95,7 @@
exp_continue
}
-re "launching ($number)" {
- send_user "\nFAILURE: job should not have run.\n"
+ send_user "\nFAILURE: job should not have run 1.\n"
set exit_code 1
exp_continue
}
@@ -127,7 +132,7 @@
}
if { $matches != 1 } {
- send_user "\nFAILURE: job dion't launch with correct limit\n"
+ send_user "\nFAILURE: job didn't launch with correct limit\n"
set exit_code 1
return $exit_code
}
@@ -141,7 +146,7 @@
exp_continue
}
-re "launching ($number)" {
- send_user "\nFAILURE: job should not have run.\n"
+ send_user "\nFAILURE: job should not have run 2.\n"
set exit_code 1
exp_continue
}
@@ -211,7 +216,7 @@
exp_continue
}
-re "Submitted batch job ($number)" {
- send_user "\nFAILURE: job should not have run.\n"
+ send_user "\nFAILURE: job should not have run 3.\n"
set exit_code 1
exp_continue
}
@@ -491,10 +496,11 @@
sleep 3
}
-
#
# Use sacctmgr to delete the test account
#
+# First wait a few seconds for the job to complete
+sleep 2
set damatches 0
set sadel_pid [spawn $sacctmgr -i delete account $ta]
expect {
diff --git a/testsuite/expect/test22.1 b/testsuite/expect/test22.1
index 58d327f..6ecf033 100755
--- a/testsuite/expect/test22.1
+++ b/testsuite/expect/test22.1
@@ -30,8 +30,14 @@
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
+source ./globals
source ./globals_accounting
+source ./inc22.1.1
+source ./inc22.1.2
+source ./inc22.1.3
+source ./inc22.1.4
+
set test_id "test22.1"
set test_nu "test22-1"
set exit_code 0
@@ -51,6 +57,7 @@
set users [format "%s,%s" $user1 $user2]
set node0 [format "%s%d" $cluster 0]
set node1 [format "%s%d" $cluster 1]
+set node_list [format "%s%s" $cluster "\[0-1\]"]
set node0_cpus 2
set node1_cpus 2
set cluster_cpus [expr $node0_cpus + $node1_cpus]
@@ -140,6 +147,11 @@
exit 0
}
+set wc_key_track [test_track_wckey]
+if { $wc_key_track == 0 } {
+ send_user "\nWARNING: Portions of this test will not work without TrackWCKey\n"
+}
+
if { [string compare [check_accounting_admin_level] "Administrator"] } {
send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
exit 0
@@ -190,7 +202,7 @@
set my_pid [eval spawn $sacctmgr -n -p list assoc users=$users account=$accounts cluster=$cluster format="User,account,id"]
expect {
-re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
incr exit_code 1
}
-re "$user1.$account1.($number)." {
@@ -242,7 +254,7 @@
set my_pid [eval spawn $sacctmgr -n -p list wckeys users=$users wckeys=$wckey1 cluster=$cluster format="user,wckey,id"]
expect {
-re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
incr exit_code 1
}
-re "$user1.$wckey1.($number)." {
@@ -275,16 +287,19 @@
exec $bin_rm -f $sql_in
set file [open $sql_in "w"]
+
+
# DON'T MESS WITH THIS UNLESS YOU REALLY UNDERSTAND WHAT YOU ARE DOING!!!!!
# THIS COULD SERIOUSLY MESS UP YOUR DATABASE IF YOU ALTER THIS INCORRECTLY
# JUST A FRIENDLY REMINDER ;)
# put in the cluster for back in the day before accounting was made here for us we are using 'Tue Jan 1 00:00:00 2008' = 1199174400 as the start
-puts $file "insert into cluster_event_table (node_name, cluster, cpu_count, period_start, period_end, reason) values"
-puts $file "('', '$cluster', $cluster_cpus, $period_start, $period_end, 'Cluster processor count')"
+
+puts $file "insert into cluster_event_table (node_name, cluster, cpu_count, period_start, period_end, reason, cluster_nodes) values"
+puts $file "('', '$cluster', $cluster_cpus, $period_start, $period_end, 'Cluster processor count', '$node_list' )"
#put a node down for 30 minutes starting at 45 minutes after the start to make sure our rollups work so we should get 15 minutes on one hour and 15 on the other
-puts $file ", ('$node0', '$cluster', $node0_cpus, $node0_down_start, $node0_down_end, 'down')"
+puts $file ", ('$node0', '$cluster', $node0_cpus, $node0_down_start, $node0_down_end, 'down','')"
#puts $file ", ('$node1', '$cluster', $node1_cpus, $period_start, $period_end, 'down')"
puts $file "on duplicate key update period_start=VALUES(period_start), period_end=VALUES(period_end);"
@@ -326,6 +341,7 @@
exit $exit_code
}
+
#
# Use sacct to see if the job loaded
#
@@ -333,7 +349,7 @@
set my_pid [eval spawn $sacct -p -C $cluster --fields=cluster,account,associd,wckey,wckeyid,start,end,elapsed --noheader --start=$start_str --end=$end_str]
expect {
-re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
incr exit_code 1
}
-re "$cluster.$account1.$user1acct1.$wckey1.$user1wckey1.$job1_start_str.$job1_end_str.$job1_diff_str." {
@@ -375,7 +391,6 @@
exit $exit_code
}
-
#
# Use sacctmgr to roll up that time period
#
@@ -383,7 +398,7 @@
set my_pid [eval spawn $sacctmgr -i roll $start_str $end_str]
expect {
-re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
+ send_user "FAILURE: there was a problem with the sacctmgr command\n"
incr exit_code 1
}
-re "$cluster" {
@@ -417,1063 +432,16 @@
exit $exit_code
}
-# Use sreport to get cluster usage for the first hour
#
-
-# Mon Dec 31 23:00:00 2007
-set period_start 1199170800
-
-# Tue Jan 1 00:00:00 2008
-set period_end 1199174400
-set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
-set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
-
-set reported [expr ($period_end - $period_start) * $cluster_cpus]
-set down [expr ($period_end-$node0_down_start) * $node0_cpus]
-set alloc_sec [expr ($period_end-$job1_start) * $node1_cpus]
-set wckey_alloc_sec $alloc_sec
-set resv 0
-set idle [expr $reported - ($down + $alloc_sec + $resv)]
-
-set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
-set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
-set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
-set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
-set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
-
-send_user "\nTesting sreport for first hour\n"
-set matches 0
-set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,reported]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$idle.$down.$alloc.$resv.$reported." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByAccount report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByAccount cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,account,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user1.$account1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 2.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the AccountUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.root..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 3} {
- send_user "\nFAILURE: sreport didn't give good info 3.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByWcKey report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user1.$wckey1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 4.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the WckeyUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$wckey1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sreport not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 2} {
- send_user "\nFAILURE: sreport didn't give good info 5.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the User Top report
-set matches 0
-set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 6.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size report
-set matches 0
-set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account1.0.$alloc_sec.0." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 7 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size by wckey report
-set matches 0
-set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1.0.$wckey_alloc_sec.0." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 8 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# Use sreport to get cluster usage for the second hour
+# Execute sub-test
#
+inc22_1_1
-# Tue Jan 1 00:00:00 2008
-set period_start 1199174400
+inc22_1_2
-# Tue Jan 1 01:00:00 2008
-set period_end 1199178000
-set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
-set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
+inc22_1_3
-set reported [expr ($period_end - $period_start) * $cluster_cpus]
-set down [expr ($node0_down_end-$period_start) * $node0_cpus]
-set alloc_sec [expr ($job1_end-$period_start) * $job1_cpus]
-set wckey_alloc_sec $alloc_sec
-set resv [expr ($period_end - $job2_elig) * $job2_cpus]
-set idle [expr $reported - ($down + $alloc_sec + $resv)]
-# do the same logic inside the plugin to figure out the correct
-# idle time and resv time
-if {$idle < 0 } {
- set resv [expr $resv + $idle]
- set idle 0
- if {$resv < 0} {
- set resv 0
- }
-}
-set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
-set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
-set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
-set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
-set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
-
-send_user "\nTesting sreport for second hour\n"
-set matches 0
-set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,reported]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$idle.$down.$alloc.$resv.$reported." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 9 $matches.\n"
- send_user "we are looking for $cluster.$idle.$down.$alloc.$resv.$reported.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByAccount report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByAccount cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,account,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user1.$account1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 10.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the AccountUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.root..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 3} {
- send_user "\nFAILURE: sreport didn't give good info 11.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByWckey report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user1.$wckey1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 12.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the WckeyUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1..$alloc." {
- incr matches
- exp_continue
- }
- -re "$cluster.$wckey1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 2} {
- send_user "\nFAILURE: sreport didn't give good info 13.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the User Top report
-set matches 0
-set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account1.$user1.$alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 14.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size report
-set matches 0
-set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account1.0.$alloc_sec.0." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 15 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size by wckey report
-set matches 0
-set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1.0.$wckey_alloc_sec.0." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 16 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# Use sreport to get cluster usage for the first 3 days
-#
-
-# Mon Dec 31 23:00:00 2007
-set period_start 1199170800
-
-# Tue Jan 3 00:00:00 2008
-set period_end 1199347200
-set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start]
-set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end]
-
-set reported [expr ($period_end - $period_start) * $cluster_cpus]
-set down [expr ($node0_down_end - $node0_down_start) * $node0_cpus]
-set alloc_sec [expr ($job1_end-$job1_start) * $job1_cpus]
-set alloc_sec [expr $alloc_sec + (($job2_end-$job2_start) * $job2_cpus)]
-set alloc_sec [expr $alloc_sec + (($job3_end-$job3_start) * $job3_cpus)]
-set wckey_alloc_sec1 [expr $job1_alloc + $job3_alloc]
-set wckey_alloc_sec2 $job2_alloc
-set resv [expr ($job2_start - $job2_elig) * $job2_cpus]
-set resv [expr $resv + (($job3_start - $job3_elig) * $job3_cpus)]
-# I didn't have time to do the correct math here so I am just putting in 9000 which should be the correct value of over commit
-set over 9000
-set resv [expr $resv - $over]
-set idle [expr $reported - ($down + $alloc_sec + $resv)]
-# do the same logic inside the plugin to figure out the correct
-# idle time and resv time
-if {$idle < 0 } {
- set resv [expr $resv + $idle]
- set idle 0
- if {$resv < 0} {
- set resv 0
- }
-}
-set down [format "%d\\\(%.2f%%\\\)" $down [expr double($down * 100)/$reported]]
-set alloc [format "%d\\\(%.2f%%\\\)" $alloc_sec [expr double($alloc_sec * 100)/$reported]]
-set resv [format "%d\\\(%.2f%%\\\)" $resv [expr double($resv * 100)/$reported]]
-set idle [format "%d\\\(%.2f%%\\\)" $idle [expr double($idle * 100)/$reported]]
-set over [format "%d\\\(%.2f%%\\\)" $over [expr double($over * 100)/$reported]]
-
-set job1_alloc_str [format "%d\\\(%.2f%%\\\)" $job1_alloc [expr double($job1_alloc * 100)/$reported]]
-set job2_alloc_str [format "%d\\\(%.2f%%\\\)" $job2_alloc [expr double($job2_alloc * 100)/$reported]]
-set job3_alloc_str [format "%d\\\(%.2f%%\\\)" $job3_alloc [expr double($job3_alloc * 100)/$reported]]
-set total_alloc_str [format "%d\\\(%.2f%%\\\)" $total_alloc [expr double($total_alloc * 100)/$reported]]
-set acct1_alloc_str [format "%d\\\(%.2f%%\\\)" $acct1_alloc [expr double($acct1_alloc * 100)/$reported]]
-set acct2_alloc_str [format "%d\\\(%.2f%%\\\)" $acct2_alloc [expr double($acct2_alloc * 100)/$reported]]
-set acct3_alloc_str [format "%d\\\(%.2f%%\\\)" $acct3_alloc [expr double($acct3_alloc * 100)/$reported]]
-set wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $wckey1_alloc [expr double($wckey1_alloc * 100)/$reported]]
-set user1_wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $user1_wckey1_alloc [expr double($user1_wckey1_alloc * 100)/$reported]]
-set user2_wckey1_alloc_str [format "%d\\\(%.2f%%\\\)" $user2_wckey1_alloc [expr double($user2_wckey1_alloc * 100)/$reported]]
-
-set reported [format "%d\\\(%.2f%%\\\)" $reported 100]
-
-
-send_user "\nTesting sreport for 3 days\n"
-set matches 0
-set my_pid [eval spawn $sreport cluster utilization cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,idle,down,alloc,res,over,reported]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$idle.$down.$alloc.$resv.$over.$reported." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 17 $matches.\n"
- send_user "we are looking for $cluster.$idle.$down.$alloc.$resv.$over.$reported.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByAccount report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByAccount cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,account,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user2.$account3.$job2_alloc_str." {
- send_user "got 2\n"
- incr matches
- exp_continue
- }
- -re "$cluster.$user1.$account1.$job1_alloc_str." {
- send_user "got 1\n"
- incr matches
- exp_continue
- }
- -re "$cluster.$user1.$account2.$job3_alloc_str." {
- send_user "got 3\n"
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 3} {
- send_user "\nFAILURE: sreport didn't give good info 18 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the AccountUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster AccountUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.root..$total_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1..$acct1_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1.$user1.$job1_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account2..$acct2_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account2.$user1.$job3_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account3..$acct3_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account3.$user2.$job2_alloc_str." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 7} {
- send_user "\nFAILURE: sreport didn't give good info 19 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the UserUtilizationByWckey report
-set matches 0
-set my_pid [eval spawn $sreport cluster UserUtilizationByWckey cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,login,wckey,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$user2.$wckey1.$user2_wckey1_alloc_str." {
- send_user "got 2\n"
- incr matches
- exp_continue
- }
- -re "$cluster.$user1.$wckey1.$user1_wckey1_alloc_str." {
- send_user "got 1\n"
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 2} {
- send_user "\nFAILURE: sreport didn't give good info 20 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the WckeyUtilizationByUser report
-set matches 0
-set my_pid [eval spawn $sreport cluster WckeyUtilizationByUser cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,wckey,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1..$wckey1_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$wckey1.$user1.$user1_wckey1_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$wckey1.$user2.$user2_wckey1_alloc_str." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 3} {
- send_user "\nFAILURE: sreport didn't give good info 21 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the User Top report
-set matches 0
-set my_pid [eval spawn $sreport user top cluster='$cluster' start=$start_str end=$end_str -tsecper -p -n format=cluster,account,login,used]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account3.$user2.$job2_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account1.$user1.$job1_alloc_str." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account2.$user1.$job3_alloc_str." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 3} {
- send_user "\nFAILURE: sreport didn't give good info 22 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size report
-set matches 0
-set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account1.0.$job1_alloc.0." {
- incr matches
- exp_continue
- }
- -re "$cluster.$account2.0.$job3_alloc.$job2_alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 2} {
- send_user "\nFAILURE: sreport didn't give good info 23 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size report down a level
-set matches 0
-set my_pid [eval spawn $sreport job size grouping=2,4 cluster='$cluster' account='$account2' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$account3.0.0.$job2_alloc." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
-
-if {$matches != 1} {
- send_user "\nFAILURE: sreport didn't give good info 24 $matches.\n"
- incr exit_code 1
-}
-
-if { $exit_code } {
- incr exit_code [archive_load $sql_rem]
- remove_user "" "" $users
- remove_acct "" $accounts
- remove_cluster "$cluster"
- exit $exit_code
-}
-
-# test the Job Size by wckey report
-set matches 0
-set my_pid [eval spawn $sreport job sizesbywckey grouping=2,4 cluster='$cluster' start=$start_str end=$end_str -tsec -p -n]
-expect {
- -re "There was a problem" {
- send_user "FAILURE: there was a problem with the sacctmgr command\n"
- incr exit_code 1
- }
- -re "$cluster.$wckey1.0.$wckey_alloc_sec1.$wckey_alloc_sec2." {
- incr matches
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sacctmgr archive load not responding\n"
- slow_kill $my_pid
- incr exit_code 1
- }
- eof {
- wait
- }
-}
+inc22_1_4
if {$matches != 1} {
send_user "\nFAILURE: sreport didn't give good info 25 $matches.\n"
@@ -1496,7 +464,7 @@
exec $bin_rm -f $sql_in
exec $bin_rm -f $sql_rem
- if {$exit_code == 0} {
+ if {$exit_code == 0} {
send_user "\nSUCCESS\n"
} else {
send_user "\nFAILURE\n"
@@ -1504,4 +472,5 @@
} else {
send_user "\nFAILURE\n"
}
+
exit $exit_code
diff --git a/testsuite/expect/test23.2 b/testsuite/expect/test23.2
index bdd94c9..aa73aee 100755
--- a/testsuite/expect/test23.2
+++ b/testsuite/expect/test23.2
@@ -68,6 +68,11 @@
exit 0
}
+if { [test_front_end] == 1 } {
+ send_user "\nWARNING: This test can't be run on a front-end system\n"
+ exit 0
+}
+
if { [string compare [check_accounting_admin_level] "Administrator"] } {
send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME set admin=admin.\n"
exit 0
@@ -189,7 +194,7 @@
# Spawn a job via srun using this account
#
set job_id3 0
-spawn $sbatch -N1 -t1 -v --account=$test_acct $file_in1
+spawn $sbatch -N1 -t1 -v --account=$test_acct --output=/dev/null $file_in1
expect {
-re "Submitted batch job ($number)" {
set job_id3 $expect_out(1,string)
@@ -639,7 +644,6 @@
set exit_code 1
}
-
#
# Use sacctmgr to delete the test account (shouldn't work, because of running jobs)
#
@@ -651,7 +655,7 @@
incr damatches
exp_continue
}
- "Job(s) running, cancel job(s) before remove" {
+ "Job(s) active, cancel job(s) before remove" {
send_user "This error was expected, no worries\n"
set needtodel 1
}
@@ -677,7 +681,11 @@
}
}
- sleep 1
+ if {[wait_for_job $job_id3 DONE] != 0} {
+ send_user "\nFAILURE: error completing job $job_id3\n"
+ cancel_job $job_id3
+ set exit_code 1
+ }
#
# Use sacctmgr to delete the test account
diff --git a/testsuite/expect/test25.1 b/testsuite/expect/test25.1
index 8dff691..b823f9e 100755
--- a/testsuite/expect/test25.1
+++ b/testsuite/expect/test25.1
@@ -65,7 +65,7 @@
set cpu_cnt 1
spawn $sinfo -h -o "%P %C" -p $partition --state=idle
expect {
- -re "$partition ($number)(K?)/($number)(K?)" {
+ -re "$partition\\* ($number)(K?)/($number)(K?)" {
set cpu_cnt $expect_out(3,string)
if {[string compare $expect_out(4,string) ""]} {
set cpu_cnt [expr $cpu_cnt * 1024]
diff --git a/testsuite/expect/test27.1 b/testsuite/expect/test27.1
new file mode 100755
index 0000000..b0614eb
--- /dev/null
+++ b/testsuite/expect/test27.1
@@ -0,0 +1,65 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test sdiag --usage option. (initially same as --help)
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "27.1"
+set exit_code 0
+set matches 0
+
+print_header $test_id
+
+#
+# Report the sdiag usage format
+#
+
+spawn $sdiag --usage
+expect {
+ -re "Usage: *sdiag" {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sdiag not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+if {$matches != 1} {
+ send_user "\nFAILURE: sdiag --usage failed ($matches)\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test27.2 b/testsuite/expect/test27.2
new file mode 100755
index 0000000..c7a4f10
--- /dev/null
+++ b/testsuite/expect/test27.2
@@ -0,0 +1,69 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# sdiag --help
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "27.2"
+set exit_code 0
+set matches 0
+
+print_header $test_id
+
+#
+# Report the sacctmgr help format
+#
+
+spawn $sdiag --help
+expect {
+ -re "Usage: sdiag" {
+ incr matches
+ exp_continue
+ }
+ -re "Help options:" {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sdiag not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+if {$matches != 2} {
+ send_user "\nFAILURE: sdiag --help failed ($matches)\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test27.3 b/testsuite/expect/test27.3
new file mode 100755
index 0000000..166ab7b
--- /dev/null
+++ b/testsuite/expect/test27.3
@@ -0,0 +1,65 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test sdiag -V (display version)
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "27.3"
+set exit_code 0
+set matches 0
+
+print_header $test_id
+
+#
+# Report the sdiag version format
+#
+
+spawn $sdiag -V
+expect {
+ -re "slurm \[0-9]*.\[0-9]*.\[0-9]*" {
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sdiag not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+if {$matches != 1} {
+ send_user "\nFAILURE: sdiag -V failed ($matches)\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test27.4 b/testsuite/expect/test27.4
new file mode 100755
index 0000000..552c4c4
--- /dev/null
+++ b/testsuite/expect/test27.4
@@ -0,0 +1,103 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test sdiag --all (default output)
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "27.4"
+set exit_code 0
+
+print_header $test_id
+
+proc sdiag_all { } {
+ global sdiag number exit_code
+ global jobs_submitted jobs_started
+
+ set matches 0
+ spawn $sdiag --all
+ expect {
+ -re "Jobs submitted: *($number)" {
+ set jobs_submitted $expect_out(1,string)
+ incr matches
+ exp_continue
+ }
+ -re "Jobs started: *($number)" {
+ set jobs_started $expect_out(1,string)
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sdiag not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 2} {
+ send_user "\nFAILURE: sdiag --all failed ($matches)\n"
+ set exit_code 1
+ }
+}
+
+sdiag_all
+set submit_begin $jobs_submitted
+set start_begin $jobs_started
+
+#
+# Submit a slurm job to increment job counters
+#
+set timeout $max_job_delay
+set srun_pid [spawn $srun -N1 -t1 $bin_id]
+expect { exp_continue
+ timeout {
+ send_user "\nFAILURE: srun not responding\n"
+ slow_kill $srun_pid
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+
+sdiag_all
+
+if {$jobs_submitted <= $submit_begin} {
+ send_user "\nFAILURE: sdiag job submit count not incremented\n"
+ set exit_code 1
+}
+if {$jobs_started <= $start_begin} {
+ send_user "\nFAILURE: sdiag job start count not incremented\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test27.5 b/testsuite/expect/test27.5
new file mode 100755
index 0000000..0b8091e
--- /dev/null
+++ b/testsuite/expect/test27.5
@@ -0,0 +1,101 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Test sdiag --reset
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "27.5"
+set exit_code 0
+
+print_header $test_id
+
+proc sdiag_all { } {
+ global sdiag number exit_code
+ global jobs_submitted jobs_started
+
+ set matches 0
+ spawn $sdiag --all
+ expect {
+ -re "Jobs submitted: *($number)" {
+ set jobs_submitted $expect_out(1,string)
+ incr matches
+ exp_continue
+ }
+ -re "Jobs started: *($number)" {
+ set jobs_started $expect_out(1,string)
+ incr matches
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: sdiag not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+ }
+
+ if {$matches != 2} {
+ send_user "\nFAILURE: sdiag --all failed ($matches)\n"
+ set exit_code 1
+ }
+}
+
+sdiag_all
+
+set perm_denied 0
+spawn $sdiag --reset
+expect {
+ -re "permission denied" {
+ set perm_denied 1
+ exp_continue
+ }
+ eof {
+ wait
+ }
+}
+if {$perm_denied != 0} {
+ send_user "\nInsufficient permission for this test\n"
+ exit $exit_code
+}
+
+sdiag_all
+
+if {$jobs_submitted != 0} {
+ send_user "\nFAILURE: sdiag job submit count not cleared\n"
+ set exit_code 1
+}
+if {$jobs_started != 0} {
+ send_user "\nFAILURE: sdiag job start count not cleared\n"
+ set exit_code 1
+}
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test3.11 b/testsuite/expect/test3.11
index b5cdadc..53786ad 100755
--- a/testsuite/expect/test3.11
+++ b/testsuite/expect/test3.11
@@ -32,6 +32,14 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
+source ./inc3.11.1
+source ./inc3.11.2
+source ./inc3.11.3
+source ./inc3.11.4
+source ./inc3.11.5
+source ./inc3.11.6
+
+
set test_id "3.11"
set file_in "test$test_id.input"
@@ -107,8 +115,8 @@
send_user "$res_params\n"
}
set ret_code 1
- exp_continue
}
+ exp_continue
}
-re "No reservations in the system" {
if {!$failure_expected} {
@@ -128,6 +136,13 @@
set ret_code 1
exp_continue
}
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ set ret_code 1
+ }
+ eof {
+ wait
+ }
}
#exp_internal 0
@@ -214,8 +229,6 @@
}
-
-
#
# Identify usable nodes in default partition
#
@@ -263,343 +276,22 @@
}
-#
-# TEST 1
-# Make a list of lists with a series of parameters to test. All the tests
-# in goodtests should pass, all those in badtests should fail.
-#
-set badtests "
- {}
- {Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=midnight Duration=600 User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=ALL Flags=ignore_jobs}
- {StartTime=now Duration=5 NodeCnt= Nodes= User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 User=$user_name Flags=ignore_jobs}
- {StartTime=blah Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=foo Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name PartitionName=badpartname Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=badtype,ignore_jobs}
- {StartTime=now+10minutes EndTime=now Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name Licenses=DUMMY_FOR_TESTING Flags=ignore_jobs}
-"
-# {StartTime=now Duration=5 Nodes=$def_node Account=badaccountname}
+# Start Test 1
+inc3_11_1
-foreach test $badtests {
- set ret_code [create_res $test 1]
- if {$ret_code == 0} {
- send_user "\nFAILURE: Reservation $test did not fail but should have\n"
- delete_res $res_name
- exit 1
- } else {
- send_user "Expected error. You can turn that frown upside-down.\n"
- }
-}
+# Start Test 2
+inc3_11_2
-if {[test_super_user] == 0} {
- send_user "\nWARNING: can not test more unless SlurmUser or root\n"
- exit $exit_code
-}
+# Start Test 3
+inc3_11_3
-set goodtests "
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now+5minutes EndTime=now+10minutes Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=midnight Duration=600 Nodes=$def_node User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=ALL User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 NodeCnt=1 User=$user_name Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name PartitionName=$def_partition Flags=ignore_jobs}
- {StartTime=now Duration=5 Nodes=$def_node User=$user_name Flags=Maint Flags=ignore_jobs}
-"
-foreach test $goodtests {
- set ret_code [create_res $test 0]
- if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
- }
- set ret_code [delete_res $res_name]
- if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to delete a reservation\n"
- exit $ret_code
- }
-}
+# Start Test 4
+inc3_11_4
-spawn $scontrol show res
-#
-# TEST 2
-# Create a reservation and update it in various ways
-#
-set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=$user_name" 0]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
-}
+# Start Test 5
+inc3_11_5
-set goodupdates "
- {PartitionName=$def_partition}
- {PartitionName=}
- {Duration=90}
- {StartTime=now+30minutes}
- {Nodes=$def_node}
- {EndTime=now+60minutes Flags=Maint NodeCnt=1 Nodes=}
-"
-# {Flags=Maint}
-# {Flags=}
+# Start Test 6
+inc3_11_6
-if {$user_name != "root"} {
- lappend goodupdates {Users+=root} {Users-=root}
-}
-
-foreach test $goodupdates {
- set ret_code [update_res $res_name $test 0]
- if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- set exit_code 1
- break
- }
-
-}
-
-set ret_code [delete_res $res_name]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to delete a reservation\n"
- exit $ret_code
-}
-
-
-#
-# TEST 3
-# Make a reservation, submit a job to it, confirm that the job is paired
-# with the reservation.
-#
-
-# Make the reservation
-set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=$user_name" 0]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
-}
-
-# Make the job script
-exec $bin_rm -f $file_in
-make_bash_script $file_in "$bin_sleep 10"
-
-# Submit the batch job
-set sbatch_pid [spawn $sbatch -N1 --reservation=$res_name $file_in]
-expect {
- -re "Submitted batch job ($number)" {
- set job_id $expect_out(1,string)
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sbatch not responding\n"
- slow_kill $sbatch_pid
- set exit_code 1
- }
- eof {
- wait
- }
-}
-if {$job_id == 0} {
- send_user "\nFAILURE: batch submit failure\n"
- exit 1
-}
-
-# Show the job, make sure reservation tag is right
-spawn $scontrol show job $job_id
-expect {
- -re "Reservation=($alpha_numeric_under)" {
- set tmp_res_name $expect_out(1,string)
- if {$tmp_res_name != $res_name} {
- send_user "\nFAILURE: problem submitting a job to a "
- send_user "reservation. Job $job_id is running on "
- send_user "reservation $tmp_res_name, not $res_name\n"
- set exit_code 1
- exp_continue
- }
- }
- -re "Invalid job id specified" {
- send_user "\nFAILURE: Job $job_id not found\n"
- set exit_code 1
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: scontrol not responding\n"
- set exit_code 1
- }
- eof {
- wait
- }
-}
-
-# Cancel the job
-spawn $scancel -v $job_id
-expect {
- -re "Invalid job_id" {
- send_user "\nFAILURE: Error cancelling the job submitted "
- send_user "to the reservation. Job $job_id not found\n"
- set exit_code 1
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: scancel not responding\n"
- set exit_code 1
- }
- eof {
- wait
- }
-}
-
-# Delete the reservation
-set ret_code [delete_res $res_name]
-if {$ret_code != 0} {
- exit $ret_code
-}
-
-
-#
-# TEST 4
-# If not running as root, make a reservation restricted to root, submit a job,
-# and confirm that the job is rejected.
-#
-if {$user_name != "root"} {
- # Make the reservation
- set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 User=root" 0]
- if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a reservation\n"
- set exit_code 1
- }
-
- # Submit the batch job
- set denied 0
- set sbatch_pid [spawn $sbatch -N1 --reservation=$res_name $file_in]
- expect {
- -re "Submitted batch job ($number)" {
- set job_id $expect_out(1,string)
- exec $scancel $job_id
- send_user "\nFAILURE: job submit should have been denied\n"
- set exit_code 1
- exp_continue
- }
- -re "Batch job submission failed: Access denied to requested reservation" {
- # Job was correctly denied
- set denied 1
- exp_continue
- }
- timeout {
- send_user "\nFAILURE: sbatch not responding\n"
- slow_kill $sbatch_pid
- set exit_code 1
- }
- eof {
- wait
- }
- }
-
- if {$denied == 0} {
- send_user "\nFAILURE: Job $job_id should have been rejected "
- send_user "from reservation restricted to root. Expected "
- send_user "rejection message not given.\n"
- set exit_code 1
- } else {
- send_user "Expected error, no worries mate.\n"
- }
- # Delete the reservation
- set ret_code [delete_res $res_name]
- if {$ret_code != 0} {
- exit $ret_code
- }
-}
-
-#
-# TEST 5
-# Test node reservation conflict
-#
-
-# Make the reservation
-set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 0]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
-}
-
-set res_name_save $res_name
-
-# Test for node reservation conflict
-set ret_code [create_res "StartTime=now+60minutes Duration=60 NodeCnt=1 user=$user_name" 1]
-if {$ret_code == 0} {
- send_user "\nFAILURE: Reservation $test did not fail but should have\n"
- delete_res $res_name
- exit 1
-} else {
- send_user "Expected error. You can turn that frown upside-down.\n"
-}
-
-# Delete the reservation
-set ret_code [delete_res $res_name_save]
-if {$ret_code != 0} {
- exit $ret_code
-}
-
-#
-# TEST 6
-# Test time reservation conflict
-#
-
-# Make the reservation
-set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 0]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
-}
-
-set res_name_save $res_name
-
-# Test for time reservation conflict (front overlap)
-set ret_code [create_res "StartTime=now+30minutes Duration=60 Nodes=ALL user=$user_name" 1]
-if {$ret_code == 0} {
- send_user "\nFAILURE: Reservation $test did not fail but should have\n"
- delete_res $res_name
- exit 1
-} else {
- send_user "Expected error. You can turn that frown upside-down.\n"
-}
-
-# Delete the reservation
-set ret_code [delete_res $res_name_save]
-if {$ret_code != 0} {
- exit $ret_code
-}
-
-
-# Make the reservation
-set ret_code [create_res "StartTime=now+30minutes Duration=60 Nodes=ALL user=$user_name" 0]
-if {$ret_code != 0} {
- send_user "\nFAILURE: Unable to create a valid reservation\n"
- exit $ret_code
-}
-
-set res_name_save $res_name
-
-# Test for time reservation conflict (trail overlap)
-set ret_code [create_res "StartTime=now+60minutes Duration=60 Nodes=ALL user=$user_name" 1]
-if {$ret_code == 0} {
- send_user "\nFAILURE: Reservation $test did not fail but should have\n"
- delete_res $res_name
- exit 1
-} else {
- send_user "Expected error. You can turn that frown upside-down.\n"
-}
-# Delete the reservation
-set ret_code [delete_res $res_name_save]
-if {$ret_code != 0} {
- exit $ret_code
-}
-
-#
-# Remove the temporary job script
-exec $bin_rm -f $file_in
-
-if {$exit_code == 0} {
- send_user "\nSUCCESS\n"
-}
exit $exit_code
diff --git a/testsuite/expect/test3.3 b/testsuite/expect/test3.3
index 65801ff..a25f129 100755
--- a/testsuite/expect/test3.3
+++ b/testsuite/expect/test3.3
@@ -142,6 +142,10 @@
set read_state 1
exp_continue
}
+ -re "State=($alpha_cap).CLOUD.DRAIN" {
+ set read_state 1
+ exp_continue
+ }
-re "Reason=($alpha_cap)" {
set reason_code $expect_out(1,string)
exp_continue
@@ -203,6 +207,10 @@
set read_state 1
exp_continue
}
+ -re "State=($alpha_cap).CLOUD.DRAIN" {
+ set read_state 1
+ exp_continue
+ }
timeout {
send_user "\nFAILURE: scontrol not responding\n"
set exit_code 1
diff --git a/testsuite/expect/test3.7 b/testsuite/expect/test3.7
index f96628e..15890eb 100755
--- a/testsuite/expect/test3.7
+++ b/testsuite/expect/test3.7
@@ -158,7 +158,7 @@
set host_name $expect_out(1,string)
exp_continue
}
- -re "BP_List=($alpha_numeric_under)" {
+ -re "MidplaneList=($alpha_numeric_under)" {
set host_name $expect_out(1,string)
exp_continue
}
@@ -208,6 +208,7 @@
sleep 5
suspend_job $job_id1 suspend
if {$not_supported == 1} {
+ exec $bin_rm -f $file_out1 $file_out2 $file_prog $file_prog_sh1 $file_prog_sh2
cancel_job $job_id1
exit 0
}
diff --git a/testsuite/expect/test4.11 b/testsuite/expect/test4.11
index 60f6dbc..66d6111 100755
--- a/testsuite/expect/test4.11
+++ b/testsuite/expect/test4.11
@@ -48,7 +48,7 @@
incr matches
exp_continue
}
- -re "REASON *USER *TIMESTAMP *BP_LIST" {
+ -re "REASON *USER *TIMESTAMP *MIDPLANELIST" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test4.12 b/testsuite/expect/test4.12
index 4adeb19..7832b68 100755
--- a/testsuite/expect/test4.12
+++ b/testsuite/expect/test4.12
@@ -85,7 +85,7 @@
}
proc sinfo_test_1 { node proc_cnt total_procs idle_cpus } {
- global sinfo number prompt
+ global float number prompt sinfo slash
upvar spawn_id spawn_id
set found 0
@@ -99,22 +99,22 @@
}
}
expect {
- -re "($number)(K?).($number)(K?).($number)(K?).($number)(K?) ($number)(K?).($number)(K?) $node" {
+ -re "($float)(K?)($slash)($float)(K?)($slash)($float)(K?)($slash)($float)(K?) ($number)($slash)($number) $node" {
set found 1
set num_alloc $expect_out(1,string)
if {[string compare $expect_out(2,string) ""]} {
set num_alloc [expr $num_alloc * 1024]
}
- set num_idle $expect_out(3,string)
- if {[string compare $expect_out(4,string) ""]} {
+ set num_idle $expect_out(4,string)
+ if {[string compare $expect_out(5,string) ""]} {
set num_idle [expr $num_idle * 1024]
}
- set num_other $expect_out(5,string)
- if {[string compare $expect_out(6,string) ""]} {
+ set num_other $expect_out(7,string)
+ if {[string compare $expect_out(8,string) ""]} {
set num_other [expr $num_other * 1024]
}
- set num_total $expect_out(7,string)
- if {[string compare $expect_out(8,string) ""]} {
+ set num_total $expect_out(10,string)
+ if {[string compare $expect_out(11,string) ""]} {
set num_total [expr $num_total * 1024]
}
@@ -351,6 +351,9 @@
if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+) State=IDLE ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} {
break
}
+ if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+) State=IDLE.CLOUD ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} {
+ break
+ }
}
exp_internal 0
if {[string compare $units ""]} {
@@ -390,9 +393,7 @@
}
if {![string compare $type "Q"]} {
- if {$psets >= 32} {
- set smallest 16
- } elseif {$psets >= 16} {
+ if {$psets >= 16} {
set smallest 32
} elseif {$psets >= 8} {
set smallest 64
diff --git a/testsuite/expect/test4.3 b/testsuite/expect/test4.3
index b569388..2896260 100755
--- a/testsuite/expect/test4.3
+++ b/testsuite/expect/test4.3
@@ -71,7 +71,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
@@ -164,7 +164,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
@@ -242,7 +242,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test4.4 b/testsuite/expect/test4.4
index ad52d3c..97d0294 100755
--- a/testsuite/expect/test4.4
+++ b/testsuite/expect/test4.4
@@ -49,7 +49,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
@@ -96,7 +96,7 @@
incr matches
exp_continue
}
- -re "BP_LIST" {
+ -re "MIDPLANELIST" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test5.6 b/testsuite/expect/test5.6
index 495acad..fb42d42 100755
--- a/testsuite/expect/test5.6
+++ b/testsuite/expect/test5.6
@@ -1,7 +1,7 @@
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
-# Test squeue filtering (--jobs, --node, --states, --steps and
+# Test squeue filtering (--jobs, --nodelist, --states, --steps and
# --user options).
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
@@ -38,6 +38,8 @@
set file_in "test$test_id.input"
set job_id1 0
set job_id2 0
+set job_name1 "job1"
+set job_name2 "job2"
print_header $test_id
@@ -53,7 +55,7 @@
#
# Submit a couple of job so we have something to look at
#
-set sbatch_pid [spawn $sbatch -N$node_cnt --output=/dev/null --error=/dev/null -t5 $file_in]
+set sbatch_pid [spawn $sbatch -N$node_cnt -J$job_name1 --output=/dev/null --error=/dev/null -t5 $file_in]
expect {
-re "Submitted batch job ($number)" {
set job_id1 $expect_out(1,string)
@@ -73,7 +75,7 @@
exit 1
}
-set sbatch_pid [spawn $sbatch -N$node_cnt --output=/dev/null --error=/dev/null --hold -t5 $file_in]
+set sbatch_pid [spawn $sbatch -N$node_cnt -J$job_name2 --output=/dev/null --error=/dev/null --hold -t5 $file_in]
expect {
-re "Submitted batch job ($number)" {
set job_id2 $expect_out(1,string)
@@ -210,7 +212,7 @@
#
# Check the squeue node filter option
#
-spawn $squeue --format=%u --noheader --user=$this_uid --node=dummy_name
+spawn $squeue --format=%u --noheader --user=$this_uid --nodelist=dummy_name
expect {
-re "($alpha_numeric_under)" {
send_user "\nFAILURE: squeue node filter failure\n"
@@ -242,7 +244,7 @@
}
}
if {$node_name_set == 1} {
- spawn $squeue --format=%u --noheader --user=$this_uid --node=$node_name
+ spawn $squeue --format=%u --noheader --user=$this_uid --nodelist=$node_name
expect {
-re "($alpha_numeric_under)" {
set node_name_set 0
@@ -341,11 +343,69 @@
set exit_code 1
}
+#
+# Filters the name associated with the job
+#
+set job_found 0
+spawn $squeue --format=%i --name $job_name1
+expect {
+ -re "($number)" {
+ set tmp_id $expect_out(1,string)
+ if {$tmp_id == $job_id1} {
+ set job_found 1
+ } else {
+ send_user "\nFAILURE: squeue reported invalid job_id ($tmp_id != $job_name1)\n"
+ set exit_code 1
+ }
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: squeue not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$job_found == 0} {
+ send_user "\nFAILURE: squeue failed to locate desired job\n"
+ set exit_code 1
+}
+
+set job_found 0
+spawn $squeue --format=%i --name $job_name2
+expect {
+ -re "($number)" {
+ set tmp_id $expect_out(1,string)
+ if {$tmp_id == $job_id2} {
+ set job_found 1
+ } else {
+ send_user "\nFAILURE: squeue reported invalid job_id ($tmp_id != $job_name2)\n"
+ set exit_code 1
+ }
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: squeue not responding\n"
+ set exit_code 1
+ }
+ eof {
+ wait
+ }
+}
+if {$job_found == 0} {
+ send_user "\nFAILURE: squeue failed to locate desired job\n"
+ set exit_code 1
+}
+
+#
+# Cancel jobs and remove files
+#
cancel_job $job_id2
cancel_job $job_id1
-exec $bin_rm -f $file_in
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
+ exec $bin_rm -f $file_in
}
exit $exit_code
diff --git a/testsuite/expect/test7.15 b/testsuite/expect/test7.15
index a8d56de..99ca7e2 100755
--- a/testsuite/expect/test7.15
+++ b/testsuite/expect/test7.15
@@ -47,13 +47,17 @@
# is intermittent.
#
if { $partition == "" } {
- if {[info exists env(SLURM_PARTITION)] } {
- set partition $env(SLURM_PARTITION)
- } else {
- set partition [default_partition]
- }
+ if {[info exists env(SLURM_PARTITION)] } {
+ set partition $env(SLURM_PARTITION)
+ } else {
+ set partition [default_partition]
+ }
}
-set nnodes [available_nodes $partition]
+if {[test_serial]} {
+ set nnodes 1
+} else {
+ set nnodes [available_nodes $partition]
+}
#
# Run the test_prog to ensure that no signals are blocked by
diff --git a/testsuite/expect/test7.3.prog.c b/testsuite/expect/test7.3.prog.c
index 1cdd992..b7352ee 100644
--- a/testsuite/expect/test7.3.prog.c
+++ b/testsuite/expect/test7.3.prog.c
@@ -67,6 +67,9 @@
if (max_nodes < min_nodes)
max_nodes = min_nodes;
+ /* We do this just to change the MPI type at the start */
+ slurm_mpi_plugin_init("none"); /* Don't try to use PMI */
+
/* Create a job allocation */
slurm_init_job_desc_msg( &job_req );
job_req.min_nodes = min_nodes;
@@ -155,6 +158,7 @@
launch->argc = 1;
launch->user_managed_io = true; /* This is the key to using
"user managed" IO */
+ launch->mpi_plugin_name = "none"; /* Don't try to use PMI */
if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) {
slurm_perror("slurm_step_launch");
diff --git a/testsuite/expect/test7.5 b/testsuite/expect/test7.5
index e3fd359..6e0dde4 100755
--- a/testsuite/expect/test7.5
+++ b/testsuite/expect/test7.5
@@ -156,11 +156,8 @@
# sleep to make sure the process is actually running
exec $bin_sleep 1
exec $bin_kill -INT $srun_pid
- exec $bin_usleep 1000
exec $bin_kill -INT $srun_pid
- exec $bin_usleep 1000
- exec $bin_kill -INT $srun_pid
- send_user "\nSent SIGINT * 3\n"
+ send_user "\nSent SIGINT * 2\n"
exp_continue
}
-re "Received signal" {
diff --git a/testsuite/expect/test7.9 b/testsuite/expect/test7.9
index b497260..467bf60 100755
--- a/testsuite/expect/test7.9
+++ b/testsuite/expect/test7.9
@@ -74,6 +74,11 @@
set invalid 1
exp_continue
}
+ -re "pmi2" {
+ send_user "\nWARNING: test incompatible with mpi/pmi2\n"
+ set invalid 1
+ exp_continue
+ }
timeout {
send_user "\nFAILURE: scontrol not responding\n"
set exit_code 1
diff --git a/testsuite/expect/test8.1 b/testsuite/expect/test8.1
index 54a4e05..b83cb77 100755
--- a/testsuite/expect/test8.1
+++ b/testsuite/expect/test8.1
@@ -56,8 +56,14 @@
if {![string compare $type "Q"]} {
set geometry "1x1x1x1"
+ set expect_geo $geometry
+
+ if {[get_bluegene_allow_sub_blocks] == 1} {
+ set expect_geo "4x4x4x4x2"
+ }
} else {
set geometry "1x1x1"
+ set expect_geo $geometry
}
#
@@ -103,6 +109,15 @@
}
#
+# Wait for job to start
+#
+if {[wait_for_job $job_id "RUNNING"] != 0} {
+ send_user "\nFAILURE: waiting for job to start\n"
+ cancel_job $job_id
+ exit 1
+}
+
+#
# Confirm parameters passed into SLURM
#
set matches 0
@@ -116,7 +131,7 @@
incr matches
exp_continue
}
- -re "Geometry=$geometry" {
+ -re "Geometry=$expect_geo" {
incr matches
exp_continue
}
@@ -167,6 +182,15 @@
}
#
+# Wait for job to start
+#
+if {[wait_for_job $job_id "RUNNING"] != 0} {
+ send_user "\nFAILURE: waiting for job to start\n"
+ cancel_job $job_id
+ exit 1
+}
+
+#
# Confirm parameters passed into SLURM
#
set matches 0
@@ -180,7 +204,7 @@
incr matches
exp_continue
}
- -re "Geometry=$geometry" {
+ -re "Geometry=$expect_geo" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test8.2 b/testsuite/expect/test8.2
index 3207fd3..9d56760 100755
--- a/testsuite/expect/test8.2
+++ b/testsuite/expect/test8.2
@@ -57,12 +57,22 @@
global env
set env(SBATCH_CONN_TYPE) torus
set env(SBATCH_NO_ROTATE) 1
+set conn_letter "Tt"
+
if {![string compare $type "Q"]} {
- set env(SBATCH_GEOMETRY) 1x1x1x1
+ set geometry "1x1x1x1"
+ set expect_geo $geometry
+
+ if {[get_bluegene_allow_sub_blocks] == 1} {
+ set expect_geo "4x4x4x4x2"
+ }
} else {
- set env(SBATCH_GEOMETRY) 1x1x1
+ set geometry "1x1x1"
+ set expect_geo $geometry
}
+set env(SBATCH_GEOMETRY) $geometry
+
#
# Delete left-over input script files
# Build input script file
@@ -103,7 +113,7 @@
set matches 0
spawn $scontrol show job $job_id
expect {
- -re "Connection=\[Tt\]" {
+ -re "Connection=\[$conn_letter\]" {
incr matches
exp_continue
}
@@ -111,7 +121,7 @@
incr matches
exp_continue
}
- -re "Geometry=1x1x1" {
+ -re "Geometry=$expect_geo" {
incr matches
exp_continue
}
diff --git a/testsuite/expect/test8.20 b/testsuite/expect/test8.20
index 28a243e..a2cef6f 100755
--- a/testsuite/expect/test8.20
+++ b/testsuite/expect/test8.20
@@ -35,14 +35,19 @@
set exit_code 0
set job_id 0
set job_size 32
+set prompt "PROMPT: "
print_header $test_id
if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} {
send_user "\nWARNING: This test is only compatable with Bluegene/Q systems\n"
exit $exit_code
+} elseif (![test_emulated]) {
+ send_user "\nWARNING: This test will only work on an emulated Blugene/Q system."
+ exit $exit_code
}
+
#
# Spawn a job via salloc
#
@@ -52,9 +57,14 @@
expect {
-re "Granted job allocation ($number)" {
set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
exp_continue
}
- -re $prompt {
+ -re "\"$prompt" {
+ # skip this, just echo of setting prompt"
+ exp_continue
+ }
+ -re "$prompt" {
#send_user "Job initiated\n"
}
timeout {
@@ -79,7 +89,7 @@
set job_start1 -1
send "$scontrol show job $job_id\r"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix $expect_out(1,string)
set job_start1 $expect_out(2,string)
set job_start2 $expect_out(3,string)
@@ -93,7 +103,7 @@
set job_fini5 $expect_out(11,string)
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
#break
}
timeout {
@@ -108,10 +118,10 @@
}
send_user "\nJob allocation\n"
send_user "prefix: $job_prefix\n"
-send_user "dim 1: $job_start1 to $job_fini1 "
-send_user "dim 2: $job_start2 to $job_fini2 "
-send_user "dim 3: $job_start3 to $job_fini3 "
-send_user "dim 4: $job_start4 to $job_fini4 "
+send_user "dim 1: $job_start1 to $job_fini1\n"
+send_user "dim 2: $job_start2 to $job_fini2\n"
+send_user "dim 3: $job_start3 to $job_fini3\n"
+send_user "dim 4: $job_start4 to $job_fini4\n"
send_user "dim 5: $job_start5 to $job_fini5\n"
set job_dim1 [expr $job_fini1 - $job_start1 + 1]
@@ -146,8 +156,10 @@
send_user "\nRunning step $job_id.$step_id at size $step_size\n"
set step_start1 -1
send "$srun -N$step_size --test-only $scontrol show step $job_id.$step_id\r"
+ # sleep here just to make sure we get the scontrol output from the srun
+ sleep .25
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set step_start1 $expect_out(2,string)
set step_start2 $expect_out(3,string)
@@ -161,7 +173,7 @@
set step_fini5 $expect_out(11,string)
exp_continue
}
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set step_start1 $expect_out(2,string)
set step_start2 $expect_out(3,string)
@@ -175,7 +187,7 @@
set step_fini5 $step_start5
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
#break
}
timeout {
@@ -191,10 +203,10 @@
send_user "\nStep allocation\n"
send_user "prefix: $step_prefix\n"
- send_user "dim 1: $step_start1 to $step_fini1 "
- send_user "dim 2: $step_start2 to $step_fini2 "
- send_user "dim 3: $step_start3 to $step_fini3 "
- send_user "dim 4: $step_start4 to $step_fini4 "
+ send_user "dim 1: $step_start1 to $step_fini1\n"
+ send_user "dim 2: $step_start2 to $step_fini2\n"
+ send_user "dim 3: $step_start3 to $step_fini3\n"
+ send_user "dim 4: $step_start4 to $step_fini4\n"
send_user "dim 5: $step_start5 to $step_fini5\n"
set step_dim1 [expr $step_fini1 - $step_start1 + 1]
diff --git a/testsuite/expect/test8.21 b/testsuite/expect/test8.21
index e11af44..b59c0cc 100755
--- a/testsuite/expect/test8.21
+++ b/testsuite/expect/test8.21
@@ -36,6 +36,7 @@
set file_prog "test$test_id.bash"
set job_id 0
set job_size 32
+set prompt "PROMPT: "
print_header $test_id
@@ -43,6 +44,9 @@
if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} {
send_user "\nWARNING: This test is only compatable with Bluegene/Q systems\n"
exit $exit_code
+} elseif (![test_emulated]) {
+ send_user "\nWARNING: This test will only work on an emulated Blugene/Q system."
+ exit $exit_code
}
#
@@ -54,9 +58,14 @@
expect {
-re "Granted job allocation ($number)" {
set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
exp_continue
}
- -re $prompt {
+ -re "\"$prompt" {
+ # skip this, just echo of setting prompt"
+ exp_continue
+ }
+ -re "$prompt" {
#send_user "Job initiated\n"
}
timeout {
@@ -81,7 +90,7 @@
set job_start1 -1
send "$scontrol show job $job_id\r"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix $expect_out(1,string)
set job_start1 $expect_out(2,string)
set job_start2 $expect_out(3,string)
@@ -95,7 +104,7 @@
set job_fini5 $expect_out(11,string)
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
#break
}
timeout {
@@ -110,10 +119,10 @@
}
send_user "\nJob allocation\n"
send_user "prefix: $job_prefix\n"
-send_user "dim 1: $job_start1 to $job_fini1 "
-send_user "dim 2: $job_start2 to $job_fini2 "
-send_user "dim 3: $job_start3 to $job_fini3 "
-send_user "dim 4: $job_start4 to $job_fini4 "
+send_user "dim 1: $job_start1 to $job_fini1\n"
+send_user "dim 2: $job_start2 to $job_fini2\n"
+send_user "dim 3: $job_start3 to $job_fini3\n"
+send_user "dim 4: $job_start4 to $job_fini4\n"
send_user "dim 5: $job_start5 to $job_fini5\n"
set job_dim1 [expr $job_fini1 - $job_start1 + 1]
@@ -152,7 +161,7 @@
set timeout 60
send "./$file_prog $srun $squeue $job_id $actual_job_size 1\r"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set step_start1 $expect_out(2,string)
set step_start2 $expect_out(3,string)
@@ -172,7 +181,7 @@
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
- send_user "\nFAILURE: invalid step c-node allocation at "
+ send_user "\nFAILURE: 1 invalid step c-node allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]/"
set exit_code 1
}
@@ -183,7 +192,7 @@
}
exp_continue
}
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set dim1 $expect_out(2,string)
set dim2 $expect_out(3,string)
@@ -193,13 +202,13 @@
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
- send_user "\nFAILURE: invalid step c-node allocation at "
+ send_user "\nFAILURE: 2 invalid step c-node allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]\n"
set exit_code 1
}
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
#break
}
timeout {
@@ -217,7 +226,7 @@
for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} {
for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} {
if {$use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) != 1} {
- send_user "\nFAILURE: c-node at \[$dim1,$dim2,$dim3,$dim4,$dim5\] "
+ send_user "\nFAILURE: 3 c-node at \[$dim1,$dim2,$dim3,$dim4,$dim5\] "
send_user "allocated $use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) times\n"
set exit_code 1
}
@@ -253,7 +262,7 @@
set timeout 60
send "./$file_prog $srun $squeue $job_id $actual_job_size 2\r"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set step_start1 $expect_out(2,string)
set step_start2 $expect_out(3,string)
@@ -273,7 +282,7 @@
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
- send_user "\nFAILURE: invalid step c-node allocation at "
+ send_user "\nFAILURE: 4 invalid step c-node allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]/"
set exit_code 1
}
@@ -284,7 +293,7 @@
}
exp_continue
}
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set dim1 $expect_out(2,string)
set dim2 $expect_out(3,string)
@@ -294,13 +303,13 @@
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
- send_user "\nFAILURE: invalid step c-node allocation at "
+ send_user "\nFAILURE: 5 invalid step c-node allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]\n"
set exit_code 1
}
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
send_user "\nNOTE: Step create errors due to busy nodes are expected\n"
send "exit\r"
exp_continue
@@ -320,7 +329,7 @@
for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} {
for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} {
if {$use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) > 1} {
- send_user "\nFAILURE: c-node at \[$dim1,$dim2,$dim3,$dim4,$dim5\] "
+ send_user "\nFAILURE: 6 c-node at \[$dim1,$dim2,$dim3,$dim4,$dim5\] "
send_user "allocated $use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) times\n"
set exit_code 1
}
diff --git a/testsuite/expect/test8.21.bash b/testsuite/expect/test8.21.bash
index c6494bc..206759c 100755
--- a/testsuite/expect/test8.21.bash
+++ b/testsuite/expect/test8.21.bash
@@ -40,4 +40,4 @@
$srun -N1 --test-only sleep 50 &
sleep 5
-$squeue --jobs=$job_id --steps --noheader --format='Step_ID=%i BP_List=%N'
+$squeue --jobs=$job_id --steps --noheader --format='Step_ID=%i MidplaneList=%N'
diff --git a/testsuite/expect/test8.22 b/testsuite/expect/test8.22
index e6dd5b4..ea029d0 100755
--- a/testsuite/expect/test8.22
+++ b/testsuite/expect/test8.22
@@ -34,9 +34,12 @@
set test_id "8.22"
set exit_code 0
set file_in "test$test_id.in"
+set test_prog "test$test_id.prog"
set job_id 0
set job_size 32
-
+set prompt "PROMPT: "
+set mpicc "/usr/local/bin/mpixlc_r"
+set mpicc_opts "-q64"
print_header $test_id
@@ -46,6 +49,20 @@
}
#
+# Test for existence of mpi compiler
+#
+if {![info exists mpicc] || ![file executable $mpicc]} {
+ set mpicc $bin_cc
+ set mpicc_opts "-m64"
+}
+
+#
+# Delete left-over program and rebuild it
+#
+exec $bin_rm -f $test_prog ${test_prog}.o
+exec $mpicc $mpicc_opts -o $test_prog ${test_prog}.c
+
+#
# Spawn a job via salloc
#
set matches 0
@@ -54,9 +71,14 @@
expect {
-re "Granted job allocation ($number)" {
set job_id $expect_out(1,string)
+ send "export PS1=\"$prompt\"\r"
exp_continue
}
- -re $prompt {
+ -re "\"$prompt" {
+ # skip this, just echo of setting prompt"
+ exp_continue
+ }
+ -re "$prompt" {
#send_user "Job initiated\n"
}
timeout {
@@ -80,7 +102,7 @@
set job_start1 -1
send "$scontrol show job $job_id\r"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix $expect_out(1,string)
set job_start1 $expect_out(2,string)
set job_start2 $expect_out(3,string)
@@ -94,7 +116,7 @@
set job_fini5 $expect_out(11,string)
exp_continue
}
- -re $prompt {
+ -re "$prompt" {
#break
}
timeout {
@@ -135,15 +157,15 @@
make_bash_script $file_in "
for ((inx=0; inx<$actual_job_size; inx++)) ; do
- $srun -N4 sleep 1 &
- $srun -N2 sleep 1 &
- $srun -N1 sleep 1 &
+ $srun -N4 $test_prog 1 &
+ $srun -N2 $test_prog 1 &
+ $srun -N1 $test_prog 1 &
done
wait"
send "./$file_in\r"
expect {
- -re $prompt {
+ -re "$prompt" {
send "exit\r"
exp_continue
}
@@ -154,7 +176,7 @@
}
if {$exit_code == 0} {
- exec rm -f $file_in
+ exec rm -f $file_in $test_prog
send_user "\nSUCCESS\n"
} else {
cancel_job $job_id
diff --git a/testsuite/expect/test8.22.prog.c b/testsuite/expect/test8.22.prog.c
new file mode 100644
index 0000000..ffdb2bb
--- /dev/null
+++ b/testsuite/expect/test8.22.prog.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+
+ if (argc == 2) {
+ sleep(strtol(argv[1], NULL, 0));
+ } else {
+ fprintf(stderr, "usage %s <seconds>\n", argv[0]);
+ }
+
+ return(0);
+}
diff --git a/testsuite/expect/test8.23 b/testsuite/expect/test8.23
index c361c00..b7c2b8b 100755
--- a/testsuite/expect/test8.23
+++ b/testsuite/expect/test8.23
@@ -84,9 +84,9 @@
exit 1
}
set job_start(1,1) -1
-spawn $squeue --jobs=$job_id(1) --noheader -o "%i BP_List=%N"
+spawn $squeue --jobs=$job_id(1) --noheader -o "%i MidplaneList=%N"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix(1) $expect_out(1,string)
set job_start(1,1) $expect_out(2,string)
set job_start(1,2) $expect_out(3,string)
@@ -174,9 +174,9 @@
sleep 15
for {set inx 2} {$inx <= $job_count} {incr inx} {
set job_start($inx,1) -1
- spawn $squeue --jobs=$job_id($inx) --noheader -o "%i BP_List=%N"
+ spawn $squeue --jobs=$job_id($inx) --noheader -o "%i MidplaneList=%N"
expect {
- -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
+ -re "MidplaneList=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix($inx) $expect_out(1,string)
set job_start($inx,1) $expect_out(2,string)
set job_start($inx,2) $expect_out(3,string)
diff --git a/testsuite/expect/test8.24 b/testsuite/expect/test8.24
new file mode 100755
index 0000000..ef2379f
--- /dev/null
+++ b/testsuite/expect/test8.24
@@ -0,0 +1,394 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+# Bluegene/Q only: Test advanced reservation creation with mutliple
+# blocks.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2009-2011 Lawrence Livermore National Security
+# Portions copyright (C) 2011 SchedMD LLC
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Dave Bremer <dbremer@llnl.gov>, Morris Jette <jette@schedmd.com>
+# CODE-OCEC-09-009. All rights reserved.
+#
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+############################################################################
+source ./globals
+
+set test_id "8.24"
+set exit_code 0
+set res_name ""
+set res_name_save ""
+set user_name ""
+set def_partition ""
+set def_node ""
+set ii 0
+
+print_header $test_id
+
+if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} {
+ send_user "\nWARNING: This test is only compatable with Bluegene/Q systems\n"
+ exit $exit_code
+}
+
+#
+# Procedure to create a new reservation and validate it.
+# Modifies the global var res_name in the process
+#
+proc create_res { res_params failure_expected node_cnt_expected} {
+ #exp_internal 1
+ global alpha_numeric_under
+ global exit_code
+ global res_name
+ global scontrol
+
+ set node_cnt_match 0
+ set ret_code 0
+ set res_name ""
+
+ #
+ # Create a reservation using the list of params in res_params
+ #
+ set arglist [linsert $res_params 0 $scontrol create res]
+ eval spawn $arglist
+ expect {
+ -re "Reservation created: ($alpha_numeric_under)" {
+ set res_name $expect_out(1,string)
+ }
+ -re "Error creating the reservation: Invalid user" {
+ if {!$failure_expected} {
+ send_user "\nFAILURE: user not authorized "
+ send_user "to create reservation\n"
+ }
+ set ret_code 1
+ exp_continue
+ }
+ -nocase "Error" {
+ if {!$failure_expected} {
+ send_user "\nFAILURE: problem creating "
+ send_user "reservation with args: $res_params\n"
+ }
+ set ret_code 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ set ret_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if { $ret_code != 0 } {
+ return $ret_code
+ }
+
+ spawn $scontrol show res $res_name
+ expect {
+ -re "ReservationName=($alpha_numeric_under)" {
+ set tmp_res_name $expect_out(1,string)
+ if {$tmp_res_name != $res_name} {
+ if {!$failure_expected} {
+ send_user "\nFAILURE: problem showing "
+ send_user "reservation created with: "
+ send_user "$res_params\n"
+ }
+ set ret_code 1
+ }
+ exp_continue
+ }
+ -re "NodeCnt=$node_cnt_expected" {
+ set node_cnt_match 1
+ }
+ -re "No reservations in the system" {
+ if {!$failure_expected} {
+ send_user "\nFAILURE: no reservations found "
+ send_user "after reservation created with: "
+ send_user "$res_params\n"
+ }
+ set ret_code 1
+ exp_continue
+ }
+ -re "Reservation ($alpha_numeric_under) not found" {
+ if {!$failure_expected} {
+ send_user "\nFAILURE: Reservation $res_name not"
+ send_user "found after reservation created "
+ send_user "with: $res_params\n"
+ }
+ set ret_code 1
+ exp_continue
+ }
+ timeout {
+ send_user "\nFAILURE: scontrol not responding\n"
+ set ret_code 1
+ }
+ eof {
+ wait
+ }
+ }
+ if {$node_cnt_expected == 0 || $node_cnt_match == 0} {
+ send_user "\nFAILURE: Expected node count of "
+ send_user "$node_cnt_expected\n"
+ set exit_code 1
+ }
+ #exp_internal 0
+
+ return $ret_code
+}
+
+
+#
+# Procedure to delete a reservation
+#
+proc delete_res { res_name } {
+ global scontrol
+ set ret_code 0
+
+ spawn $scontrol delete ReservationName=$res_name
+ expect {
+ -re "invalid" {
+ send_user "\nFAILURE: problem deleting reservation $res_name\n"
+ set ret_code 1
+ exp_continue
+ }
+ -re "reservation is in use" {
+ send_user "\nFAILURE: $res_name is in use\n"
+ set ret_code 1
+ exp_continue
+ }
+ }
+ return $ret_code
+}
+
+
+#
+# Identify usable nodes in default partition
+#
+spawn $sinfo -h -o %32P
+expect {
+ -re "($alpha_numeric_under)(\\*)" {
+ set def_partition $expect_out(1,string)
+ exp_continue
+ }
+ eof {
+ wait
+ }
+}
+if {[string compare $def_partition ""] == 0} {
+ send_user "\nFAILURE: failed to find default partition\n"
+ exit 1
+}
+spawn $sinfo -h -o "NodeList=%N NodeCnt=%D" -p $def_partition
+expect {
+ -re "NodeList=($alpha_numeric_nodelist)" {
+ set def_node $expect_out(1,string)
+ set def_node_cnt $expect_out(2,string)
+ exp_continue
+ }
+ -re "NodeCnt=($number)\.($number)K" {
+ set def_node_cnt $expect_out(1,string)
+ set def_node_cnt [expr $def_node_cnt * 1024 + 512]
+ exp_continue
+ }
+ -re "NodeCnt=($number)K" {
+ set def_node_cnt $expect_out(1,string)
+ set def_node_cnt [expr $def_node_cnt * 1024]
+ exp_continue
+ }
+ -re "NodeCnt=($number)" {
+ set def_node_cnt $expect_out(1,string)
+ exp_continue
+ }
+ eof {
+ wait
+ }
+}
+if {[string compare $def_node ""] == 0} {
+ send_user "\nFAILURE: default partition seems to have no nodes\n"
+ exit 1
+}
+send_user "\nNodeCnt=$def_node_cnt\n"
+
+#
+# Get the user name
+#
+spawn $bin_id -un
+expect {
+ -re "($alpha_numeric_under)" {
+ set user_name $expect_out(1,string)
+ }
+ eof {
+ wait
+ }
+}
+
+
+#
+# Make a list of lists with a series of parameters to test. All the tests
+# in goodtests should pass, all those in badtests should fail.
+#
+send_user "\nTESTING invalid reservation\n===========================\n"
+set badtests "
+ {StartTime=now Duration=5 NodeCnt=1,A}
+ {StartTime=now Duration=5 NodeCnt=A,B}
+ {StartTime=now Duration=5 NodeCnt=1b,32}
+"
+
+foreach test $badtests {
+ set ret_code [create_res $test 1 0]
+ if {$ret_code == 0} {
+ send_user "\nFAILURE: Reservation $test did not fail but should have\n"
+ delete_res $res_name
+ exit 1
+ } else {
+ send_user "Expected error. You can turn that frown upside-down.\n"
+ }
+}
+
+if {[test_super_user] == 0} {
+ send_user "\nWARNING: Can not test more unless SlurmUser or root\n"
+ exit $exit_code
+}
+if {[string compare [get_bluegene_layout] Dynamic]} {
+ send_user "\nWARNING: Can not test more unless dynamic bluegene system\n"
+ exit $exit_code
+}
+
+if {$def_node_cnt >= 512} {
+ send_user "\nTESTING 512 node reservation\n============================\n"
+ set goodtests "
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,32,32,32,32}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=2}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,2,32}
+ "
+ set good_size(0) "512"
+ set good_size(1) "512"
+ set good_size(2) "512"
+
+ set inx 0
+ foreach test $goodtests {
+ set ret_code [create_res $test 0 $good_size($inx)]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+ incr inx
+ }
+}
+spawn $scontrol show res
+
+if {$def_node_cnt >= 1024} {
+ send_user "\nTESTING 1024 node reservation\n=============================\n"
+ set goodtests "
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,32,512,32,32,32}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=800}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,500,32}
+ "
+ set good_size(0) "1024"
+ set good_size(1) "1024"
+ set good_size(2) "1024"
+
+ set inx 0
+ foreach test $goodtests {
+ set ret_code [create_res $test 0 $good_size($inx)]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+ incr inx
+ }
+}
+spawn $scontrol show res
+
+if {$def_node_cnt >= 2048} {
+ send_user "\nTESTING 2048 node reservation\n=============================\n"
+ set goodtests "
+ {StartTime=now Duration=5 User=$user_name NodeCnt=2000}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=1k,1k}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,1k,32}
+ "
+ set good_size(0) "2048"
+ set good_size(1) "2048"
+ set good_size(2) "1536"
+
+ set inx 0
+ foreach test $goodtests {
+ set ret_code [create_res $test 0 $good_size($inx)]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+ incr inx
+ }
+}
+spawn $scontrol show res
+
+if {$def_node_cnt >= 4096} {
+ send_user "\nTESTING 4096 node reservation\n=============================\n"
+ set goodtests "
+ {StartTime=now Duration=5 User=$user_name NodeCnt=32,2k,32,1k,32,512}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=3k,1k}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=2k,2k}
+ {StartTime=now Duration=5 User=$user_name NodeCnt=1k,1k,1k,1k}
+ "
+ set good_size(0) "4096"
+ set good_size(1) "4096"
+ set good_size(2) "4096"
+ set good_size(3) "4096"
+
+ set inx 0
+ foreach test $goodtests {
+ set ret_code [create_res $test 0 $good_size($inx)]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to create a valid reservation\n"
+ exit $ret_code
+ }
+ set ret_code [delete_res $res_name]
+ if {$ret_code != 0} {
+ send_user "\nFAILURE: Unable to delete a reservation\n"
+ exit $ret_code
+ }
+ incr inx
+ }
+}
+spawn $scontrol show res
+
+if {$exit_code == 0} {
+ send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test8.4 b/testsuite/expect/test8.4
index 689175f..a9aae6f7 100755
--- a/testsuite/expect/test8.4
+++ b/testsuite/expect/test8.4
@@ -44,14 +44,33 @@
set test_prog "test$test_id.prog"
set job_id 0
-# Blue Gene cross-compiler info
-set task_cnt 16
-set mpicc "/usr/local/bin/mpxlc"
-set mpicc_opts "-qarch=440d"
-set mpirun_opts "-nodes $task_cnt -exe [$bin_pwd]/$test_prog -cwd [$bin_pwd]"
print_header $test_id
+if {[test_bluegene] == 0} {
+ send_user "\nWARNING: This test is only compatable with bluegene systems\n"
+ exit $exit_code
+}
+
+set type [get_bluegene_type]
+
+if {$type == 0} {
+ send_user "\nFAILURE: No bluegene type found \n"
+ exit 1
+}
+
+set task_cnt 16
+if {![string compare $type "P"] || ![string compare $type "L"]} {
+ # Blue Gene cross-compiler info
+ set mpicc "/usr/local/bin/mpxlc"
+ set mpicc_opts "-qarch=440d"
+ set bg_exec "$mpirun -nodes $task_cnt -exe [$bin_pwd]/$test_prog -cwd [$bin_pwd]"
+} else {
+ set mpicc "/usr/local/bin/mpixlc_r"
+ set mpicc_opts "-q64"
+ set bg_exec "$srun -n $task_cnt $test_prog"
+}
+
#
# Test for existence of mpi compiler
#
@@ -81,7 +100,7 @@
exec $bin_rm -f $file_in $file_out $file_err
exec echo "#!$bin_bash" >$file_in
exec echo "$bin_env | $bin_grep MPIRUN_PARTITION" >>$file_in
-exec echo "$mpirun $mpirun_opts" >>$file_in
+exec echo "$bg_exec" >>$file_in
exec echo "$bin_echo TEST_COMPLETE" >>$file_in
exec $bin_chmod 700 $file_in
diff --git a/testsuite/expect/test8.5 b/testsuite/expect/test8.5
index 76cee2b..6ea9867 100755
--- a/testsuite/expect/test8.5
+++ b/testsuite/expect/test8.5
@@ -181,6 +181,13 @@
exit $exit_code
}
+set part_name [default_partition]
+set shared [partition_shared $part_name]
+if {[string compare $shared FORCE]} {
+ send_user "\nFAILURE: System configured Dynamic, but Partition $part_name has Shared=$shared\n"
+ exit 1
+}
+
set psets [get_bluegene_psets]
if {$psets == 0} {
diff --git a/testsuite/expect/test8.6 b/testsuite/expect/test8.6
index f5077f5..f921fb4 100755
--- a/testsuite/expect/test8.6
+++ b/testsuite/expect/test8.6
@@ -83,54 +83,6 @@
return $start_cnt
}
-# Wait up to 900 seconds for all jobs to terminate
-# Return 0 if all jobs done, remainin job count otherwise
-proc wait_for_all_jobs { } {
- global scancel squeue bin_sleep file_in
-
- set last_matches 0
- send_user "Waiting for all jobs to terminate\n"
- for {set inx 0} {$inx < 600} {incr inx} {
- set matches 0
- log_user 0
- spawn $squeue -o %j
- expect {
- -re "$file_in" {
- incr matches
- exp_continue
- }
- -re "error" {
- set matches -1
- }
- timeout {
- send_user "No response from squeue\n"
- set matches -1
- }
- eof {
- wait
- }
- }
- log_user 1
- if {$matches == 0} {
- send_user "All jobs complete\n"
- break
- }
- if {$matches > 0} {
- send_user " $matches jobs remaining\n"
-# Moab can slow throughput down a lot,
-# so don't return here
-# if {$matches == $last_matches} {
-# send_user "Running jobs hung\n"
-# break
-# }
-# set last_matches $matches
- exec $bin_sleep 15
- }
- }
- exec $scancel -n $file_in
- return $matches
-}
-
proc run_bgl_test { } {
global psets num_nodes
@@ -312,7 +264,7 @@
send_user "Started $started jobs\n"
-if {[wait_for_all_jobs] != 0} {
+if {[wait_for_all_jobs $file_in 1] != 0} {
send_user "\nFAILURE: some submitted jobs failed to terminate\n"
set exit_code 1
}
diff --git a/testsuite/expect/test9.9 b/testsuite/expect/test9.9
new file mode 100755
index 0000000..0d92145
--- /dev/null
+++ b/testsuite/expect/test9.9
@@ -0,0 +1,85 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Timing test for 2000 jobs.
+#
+# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+# "FAILURE: ..." otherwise with an explanation of the failure, OR
+# anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2012 SchedMD LLC
+# Written by Danny Auble <da@schedmd.com>
+#
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.schedmd.com/slurmdocs/>.
+# Please also read the included file: DISCLAIMER.
+#
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+############################################################################
+source ./globals
+
+set test_id "9.9"
+set exit_code 0
+set job_cnt 2000
+
+print_header $test_id
+
+# if {[test_front_end] != 0 || $enable_memory_leak_debug != 0} {
+# set job_cnt 2
+# }
+
+proc _submit_jobs { job_name test_file } {
+ global exit_code job_cnt sbatch srun number
+
+ log_user 0
+ for {set inx 0} {$inx < $job_cnt} {incr inx} {
+ spawn -nottyinit -nottycopy $sbatch -J $job_name -o $test_file --wrap $job_name
+ #spawn $srun -J $job_name -o $test_file $job_name
+ expect {
+# -re "Submitted batch job ($number)" {
+# exp_continue
+# }
+# -re "Unable to contact" {
+# send_user "\nFAILURE: slurm appears to be down\n"
+# exp_continue
+# }
+# timeout {
+# send_user "\nFAILURE: sbatch not responding\n"
+# slow_kill $sbatch_pid
+# set exit_code 1
+# }
+ eof {
+ wait
+ }
+ }
+ }
+ log_user 1
+ sleep 1
+ if {[wait_for_all_jobs $job_name 0] != 0} {
+ send_user "\nFAILURE: some submitted jobs failed to terminate\n"
+ set exit_code 1
+ }
+}
+
+set time_took [string trim [time {_submit_jobs "hostname" "/dev/null"}] " per iteration microseconds"]
+set time_took_sec [expr $time_took / 1000000]
+set time_per_job [expr $job_cnt / $time_took_sec]
+send_user "test 1 took $time_took microseconds == $time_took_sec seconds, $time_per_job jobs a sec\n"
+
+if { $exit_code != 0 } {
+ exit $exit_code
+}
+
+send_user "\nSUCCESS\n"
+exit $exit_code
diff --git a/testsuite/slurm_unit/Makefile.in b/testsuite/slurm_unit/Makefile.in
index 7681dd8..654b62e 100644
--- a/testsuite/slurm_unit/Makefile.in
+++ b/testsuite/slurm_unit/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -201,6 +201,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -237,6 +238,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -574,10 +576,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/testsuite/slurm_unit/api/Makefile.in b/testsuite/slurm_unit/api/Makefile.in
index e9bea9b..d00b4a6 100644
--- a/testsuite/slurm_unit/api/Makefile.in
+++ b/testsuite/slurm_unit/api/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -224,6 +224,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -260,6 +261,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -396,7 +398,7 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-api-test$(EXEEXT): $(api_test_OBJECTS) $(api_test_DEPENDENCIES)
+api-test$(EXEEXT): $(api_test_OBJECTS) $(api_test_DEPENDENCIES) $(EXTRA_api_test_DEPENDENCIES)
@rm -f api-test$(EXEEXT)
$(LINK) $(api_test_OBJECTS) $(api_test_LDADD) $(LIBS)
@@ -651,14 +653,15 @@
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
@@ -737,10 +740,15 @@
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/testsuite/slurm_unit/api/manual/Makefile.in b/testsuite/slurm_unit/api/manual/Makefile.in
index 8297372..07f19c5 100644
--- a/testsuite/slurm_unit/api/manual/Makefile.in
+++ b/testsuite/slurm_unit/api/manual/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -216,6 +216,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -252,6 +253,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -385,28 +387,28 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-cancel-tst$(EXEEXT): $(cancel_tst_OBJECTS) $(cancel_tst_DEPENDENCIES)
+cancel-tst$(EXEEXT): $(cancel_tst_OBJECTS) $(cancel_tst_DEPENDENCIES) $(EXTRA_cancel_tst_DEPENDENCIES)
@rm -f cancel-tst$(EXEEXT)
$(LINK) $(cancel_tst_OBJECTS) $(cancel_tst_LDADD) $(LIBS)
-complete-tst$(EXEEXT): $(complete_tst_OBJECTS) $(complete_tst_DEPENDENCIES)
+complete-tst$(EXEEXT): $(complete_tst_OBJECTS) $(complete_tst_DEPENDENCIES) $(EXTRA_complete_tst_DEPENDENCIES)
@rm -f complete-tst$(EXEEXT)
$(LINK) $(complete_tst_OBJECTS) $(complete_tst_LDADD) $(LIBS)
-job_info-tst$(EXEEXT): $(job_info_tst_OBJECTS) $(job_info_tst_DEPENDENCIES)
+job_info-tst$(EXEEXT): $(job_info_tst_OBJECTS) $(job_info_tst_DEPENDENCIES) $(EXTRA_job_info_tst_DEPENDENCIES)
@rm -f job_info-tst$(EXEEXT)
$(LINK) $(job_info_tst_OBJECTS) $(job_info_tst_LDADD) $(LIBS)
-node_info-tst$(EXEEXT): $(node_info_tst_OBJECTS) $(node_info_tst_DEPENDENCIES)
+node_info-tst$(EXEEXT): $(node_info_tst_OBJECTS) $(node_info_tst_DEPENDENCIES) $(EXTRA_node_info_tst_DEPENDENCIES)
@rm -f node_info-tst$(EXEEXT)
$(LINK) $(node_info_tst_OBJECTS) $(node_info_tst_LDADD) $(LIBS)
-partition_info-tst$(EXEEXT): $(partition_info_tst_OBJECTS) $(partition_info_tst_DEPENDENCIES)
+partition_info-tst$(EXEEXT): $(partition_info_tst_OBJECTS) $(partition_info_tst_DEPENDENCIES) $(EXTRA_partition_info_tst_DEPENDENCIES)
@rm -f partition_info-tst$(EXEEXT)
$(LINK) $(partition_info_tst_OBJECTS) $(partition_info_tst_LDADD) $(LIBS)
-reconfigure-tst$(EXEEXT): $(reconfigure_tst_OBJECTS) $(reconfigure_tst_DEPENDENCIES)
+reconfigure-tst$(EXEEXT): $(reconfigure_tst_OBJECTS) $(reconfigure_tst_DEPENDENCIES) $(EXTRA_reconfigure_tst_DEPENDENCIES)
@rm -f reconfigure-tst$(EXEEXT)
$(LINK) $(reconfigure_tst_OBJECTS) $(reconfigure_tst_LDADD) $(LIBS)
-submit-tst$(EXEEXT): $(submit_tst_OBJECTS) $(submit_tst_DEPENDENCIES)
+submit-tst$(EXEEXT): $(submit_tst_OBJECTS) $(submit_tst_DEPENDENCIES) $(EXTRA_submit_tst_DEPENDENCIES)
@rm -f submit-tst$(EXEEXT)
$(LINK) $(submit_tst_OBJECTS) $(submit_tst_LDADD) $(LIBS)
-update_config-tst$(EXEEXT): $(update_config_tst_OBJECTS) $(update_config_tst_DEPENDENCIES)
+update_config-tst$(EXEEXT): $(update_config_tst_OBJECTS) $(update_config_tst_DEPENDENCIES) $(EXTRA_update_config_tst_DEPENDENCIES)
@rm -f update_config-tst$(EXEEXT)
$(LINK) $(update_config_tst_OBJECTS) $(update_config_tst_LDADD) $(LIBS)
@@ -549,10 +551,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/testsuite/slurm_unit/common/Makefile.in b/testsuite/slurm_unit/common/Makefile.in
index 2c4ffed..584def0 100644
--- a/testsuite/slurm_unit/common/Makefile.in
+++ b/testsuite/slurm_unit/common/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -205,6 +205,7 @@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_DIR = @MUNGE_DIR@
MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
MUNGE_LIBS = @MUNGE_LIBS@
MYSQL_CFLAGS = @MYSQL_CFLAGS@
@@ -241,6 +242,7 @@
READLINE_LIBS = @READLINE_LIBS@
REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@
RELEASE = @RELEASE@
+RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@
SED = @SED@
SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
@@ -380,16 +382,16 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-bitstring-test$(EXEEXT): $(bitstring_test_OBJECTS) $(bitstring_test_DEPENDENCIES)
+bitstring-test$(EXEEXT): $(bitstring_test_OBJECTS) $(bitstring_test_DEPENDENCIES) $(EXTRA_bitstring_test_DEPENDENCIES)
@rm -f bitstring-test$(EXEEXT)
$(LINK) $(bitstring_test_OBJECTS) $(bitstring_test_LDADD) $(LIBS)
-log-test$(EXEEXT): $(log_test_OBJECTS) $(log_test_DEPENDENCIES)
+log-test$(EXEEXT): $(log_test_OBJECTS) $(log_test_DEPENDENCIES) $(EXTRA_log_test_DEPENDENCIES)
@rm -f log-test$(EXEEXT)
$(LINK) $(log_test_OBJECTS) $(log_test_LDADD) $(LIBS)
-pack-test$(EXEEXT): $(pack_test_OBJECTS) $(pack_test_DEPENDENCIES)
+pack-test$(EXEEXT): $(pack_test_OBJECTS) $(pack_test_DEPENDENCIES) $(EXTRA_pack_test_DEPENDENCIES)
@rm -f pack-test$(EXEEXT)
$(LINK) $(pack_test_OBJECTS) $(pack_test_LDADD) $(LIBS)
-runqsw$(EXEEXT): $(runqsw_OBJECTS) $(runqsw_DEPENDENCIES)
+runqsw$(EXEEXT): $(runqsw_OBJECTS) $(runqsw_DEPENDENCIES) $(EXTRA_runqsw_DEPENDENCIES)
@rm -f runqsw$(EXEEXT)
$(LINK) $(runqsw_OBJECTS) $(runqsw_LDADD) $(LIBS)
@@ -564,14 +566,15 @@
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
@@ -621,10 +624,15 @@
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic: