blob: b36b21e01cd7ad0a71649bf8ad21e249d8cab9f0 [file] [log] [blame]
/*
* $Id$
* $Source$
*
* Demo the routines in common/qsw.c
* This can run mping on the local node (uses shared memory comms).
* ./runqsw /usr/lib/mpi-test/mping 1 1024
*/
#include <sys/types.h>
#include <sys/wait.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "src/plugins/switch/elan/qsw.h"
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
/* Boolean option to pack/unpack jobinfo struct
* (good test for qsw pack routines)
*/
bool pack_jobinfo = false;
/*
* Set a variable in the callers environment. Args are printf style.
* XXX Space is allocated on the heap and will never be reclaimed.
* Example: setenvf("RMS_RANK=%d", rank);
*/
static int
setenvf(const char *fmt, ...)
{
va_list ap;
char buf[BUFSIZ];
char *bufcpy;
va_start(ap, fmt);
vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
bufcpy = strdup(buf);
if (bufcpy == NULL)
return -1;
return putenv(bufcpy);
}
/*
* Set environment variables needed by QSW MPICH / libelan.
*/
static int
do_env(int nodeid, int procid, int nprocs)
{
if (setenvf("RMS_RANK=%d", procid) < 0)
return -1;
if (setenvf("RMS_NODEID=%d", nodeid) < 0)
return -1;
if (setenvf("RMS_PROCID=%d", procid) < 0)
return -1;
if (setenvf("RMS_NNODES=%d", 1) < 0)
return -1;
if (setenvf("RMS_NPROCS=%d", nprocs) < 0)
return -1;
return 0;
}
/*
* Set up and run 'nprocs' copies of the parallel job.
*/
void
slurmd(qsw_jobinfo_t job, uid_t uid, int nodeid, int nprocs, char *cmdbuf)
{
pid_t pid;
int cpid[QSW_MAX_TASKS];
int i;
/* Process 1: */
switch ((pid = fork())) {
case -1:
slurm_perror("fork");
exit(1);
case 0: /* child falls thru */
break;
default: /* parent */
if (waitpid(pid, NULL, 0) < 0) {
slurm_perror("wait");
exit(1);
}
if (qsw_prgdestroy(job) < 0) {
slurm_perror("qsw_prgdestroy");
exit(1);
}
exit(0);
}
/* Process 2: */
if (qsw_prog_init(job, uid) < 0) {
slurm_perror("qsw_prog_init");
exit(1);
}
for (i = 0; i < nprocs; i++) {
cpid[i] = fork();
if (cpid[i] < 0) {
slurm_perror("fork");
exit(1);
} else if (cpid[i] == 0)
break;
}
/* still in parent */
if (i == nprocs) {
int waiting = nprocs;
int j;
while (waiting > 0) {
pid = waitpid(0, NULL, 0);
if (pid < 0) {
slurm_perror("waitpid");
exit(1);
}
for (j = 0; j < nprocs; j++) {
if (cpid[j] == pid)
waiting--;
}
}
exit(0);
}
/* Process 3: (there are nprocs instances of us) */
if (qsw_setcap(job, i) < 0) {
slurm_perror("qsw_setcap");
exit(1);
}
if (do_env(i, nodeid, nprocs) < 0) {
slurm_perror("do_env");
exit(1);
}
pid = fork();
switch (pid) {
case -1: /* error */
slurm_perror("fork");
exit(1);
case 0: /* child falls thru */
break;
default: /* parent */
if (waitpid(pid, NULL, 0) < 0) {
slurm_perror("waitpid");
exit(1);
}
exit(0);
}
/* child continues here */
/* Process 4: execs the job */
if (setuid(uid) < 0) {
slurm_perror("setuid");
exit(1);
}
execl("/bin/bash", "bash", "-c", cmdbuf, 0);
slurm_perror("execl");
exit(1);
}
/*
* Print usage message and exit.
*/
void
usage(void)
{
printf("Usage: runqsw [-p] [-u uid] [-i elanid] [-n nprocs] exec args\n");
exit(1);
}
int
main(int argc, char *argv[])
{
extern char *optarg;
qsw_jobinfo_t job, j;
int c;
int nprocs = 0;
int nodeid = -1;
uid_t uid = getuid();
bitstr_t bit_decl(nodeset, QSW_MAX_TASKS);
char cmdbuf[1024] = { 0 };
/*
* Handle arguments.
*/
while ((c = getopt(argc, argv, "pi:u:n:")) != EOF) {
switch (c) {
case 'n':
nprocs = atoi(optarg);
break;
case 'u':
uid = atoi(optarg);
break;
case 'i':
nodeid = atoi(optarg);
break;
case 'p':
pack_jobinfo = true;
break;
default:
usage();
}
}
if (optind == argc)
usage();
if (nprocs == 0)
nprocs = 2;
if (nodeid < 0) {
if ((nodeid = qsw_getnodeid()) < 0) {
slurm_perror("qsw_getnodeid");
exit(1);
}
}
while (optind < argc)
sprintf(cmdbuf + strlen(cmdbuf), "%s ", argv[optind++]);
cmdbuf[strlen(cmdbuf) - 1] = '\0';
bit_set(nodeset, nodeid);
/*
* Set up 'job' to describe the parallel program.
* Srun would do this when running without slurmctld,
* otherwise slurmctld would do this after having calling
* qsw_init to establish a persistant state in the library.
*/
if (qsw_alloc_jobinfo(&job) < 0) {
slurm_perror("qsw_alloc_jobinfo");
exit(1);
}
if (qsw_setup_jobinfo(job, nprocs, nodeset, 0) < 0) {
slurm_perror("qsw_setup_jobinfo");
exit(1);
}
/*
* pack and unpack job if requested
* print jobinfo struct regardless.
*/
qsw_print_jobinfo(stderr, job);
if (pack_jobinfo) {
Buf buffer;
buffer = init_buf(8096);
qsw_pack_jobinfo(job, buffer);
qsw_alloc_jobinfo(&j);
set_buf_offset(buffer,0);
qsw_unpack_jobinfo(j, buffer);
qsw_print_jobinfo(stderr, j);
} else
j = job;
/*
* Now execute the parallel job like slurmd will.
*/
slurmd(j, uid, nodeid, nprocs, cmdbuf);
/*
* Free the 'job' information.
*/
qsw_free_jobinfo(job);
exit(0);
}