blob: bc6efcbfdeb3c3d093c9ed4b31ac2db7693f6c53 [file] [log] [blame]
/*****************************************************************************\
* pam_slurm_adopt.c - Adopt incoming connections into jobs
*****************************************************************************
* Copyright (C) 2015, Brigham Young University
* Author: Ryan Cox <ryan_cox@byu.edu>
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#ifndef PAM_MODULE_NAME
# define PAM_MODULE_NAME "pam_slurm_adopt"
#endif
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <security/_pam_macros.h>
#include <security/pam_ext.h>
#define PAM_SM_ACCOUNT
#include <security/pam_modules.h>
#include <security/pam_modutil.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <syslog.h>
#include <pwd.h>
#include <stddef.h>
#include <stdint.h>
#include <arpa/inet.h>
#include "helper.h"
#include "slurm/slurm.h"
#include "src/common/slurm_xlator.h"
#include "src/common/callerid.h"
#include "src/interfaces/cgroup.h"
#include "src/common/slurm_protocol_api.h"
typedef enum {
CALLERID_ACTION_NEWEST,
CALLERID_ACTION_ALLOW,
CALLERID_ACTION_IGNORE,
CALLERID_ACTION_DENY,
} callerid_action_t;
/* module options */
static struct {
int single_job_skip_rpc; /* Undocumented. If 1 and there is only 1 user
* job, adopt it and skip RPC. If 0, *always*
* try RPC even in single job situations.
* Unlikely to ever be set to 0. */
int ignore_root;
callerid_action_t action_no_jobs;
callerid_action_t action_unknown;
callerid_action_t action_adopt_failure;
callerid_action_t action_generic_failure;
log_level_t log_level;
char *node_name;
bool disable_x11;
char *pam_service;
bool join_container;
} opts;
static void _init_opts(void)
{
opts.single_job_skip_rpc = 1;
opts.ignore_root = 1;
opts.action_no_jobs = CALLERID_ACTION_DENY;
opts.action_unknown = CALLERID_ACTION_NEWEST;
opts.action_adopt_failure = CALLERID_ACTION_ALLOW;
opts.action_generic_failure = CALLERID_ACTION_IGNORE;
opts.log_level = LOG_LEVEL_INFO;
opts.node_name = NULL;
opts.disable_x11 = false;
opts.pam_service = NULL;
opts.join_container = true;
}
/* Adopts a process into the given step. Returns SLURM_SUCCESS if
* opts.action_adopt_failure == CALLERID_ACTION_ALLOW or if the process was
* successfully adopted.
*/
static int _adopt_process(pam_handle_t *pamh, pid_t pid, step_loc_t *stepd)
{
int fd;
uint16_t protocol_version;
int rc;
if (!stepd)
return -1;
debug("%s: trying to get %ps to adopt %d",
__func__, &stepd->step_id, pid);
fd = stepd_connect(stepd->directory, stepd->nodename,
&stepd->step_id, &protocol_version);
if (fd < 0) {
/* It's normal for a step to exit */
debug3("unable to connect to %ps on %s: %m",
&stepd->step_id, stepd->nodename);
return -1;
}
rc = stepd_add_extern_pid(fd, stepd->protocol_version, pid);
if (rc == SLURM_SUCCESS) {
char *env;
env = xstrdup_printf("SLURM_JOB_ID=%u", stepd->step_id.job_id);
pam_putenv(pamh, env);
xfree(env);
}
if ((rc == SLURM_SUCCESS) && !opts.disable_x11) {
int display;
char *xauthority;
display = stepd_get_x11_display(fd, stepd->protocol_version,
&xauthority);
if (display) {
char *env;
env = xstrdup_printf("DISPLAY=localhost:%d.0", display);
pam_putenv(pamh, env);
xfree(env);
}
if (xauthority) {
char *env;
env = xstrdup_printf("XAUTHORITY=%s", xauthority);
pam_putenv(pamh, env);
xfree(env);
xfree(xauthority);
}
}
if (opts.join_container) {
int ns_fd = stepd_get_namespace_fd(fd, protocol_version);
if (ns_fd == -1) {
error("stepd_get_ns_fd failed");
rc = SLURM_ERROR;
} else if (ns_fd == 0) {
debug2("No ns_fd given back, expected if not running with a job_container plugin that supports namespace mounting");
} else {
/*
* No need to specify the type of namespace, rely on
* slurm to give us the right one
*/
if (setns(ns_fd, 0)) {
error("setns() failed: %m");
rc = SLURM_ERROR;
}
}
}
close(fd);
if (rc == SLURM_SUCCESS)
info("Process %d adopted into job %u",
pid, stepd->step_id.job_id);
else
info("Process %d adoption FAILED for job %u",
pid, stepd->step_id.job_id);
return rc;
}
/* Returns negative number on failure. Failures are likely to occur if a step
* exits; this is not a problem. */
static uid_t _get_job_uid(step_loc_t *stepd)
{
uid_t uid = -1;
int fd;
fd = stepd_connect(stepd->directory, stepd->nodename,
&stepd->step_id, &stepd->protocol_version);
if (fd < 0) {
/* It's normal for a step to exit */
debug3("unable to connect to %ps on %s: %m",
&stepd->step_id, stepd->nodename);
return -1;
}
uid = stepd_get_uid(fd, stepd->protocol_version);
close(fd);
/* The step may have exited. Not a big concern. */
if ((int32_t)uid == -1)
debug3("unable to determine uid of %ps on %s",
&stepd->step_id, stepd->nodename);
return uid;
}
/* Return mtime of a cgroup. If we can't read the right cgroup information,
* return 0. That results in a (somewhat) random choice of job */
static time_t _cgroup_creation_time(char *uidcg, uint32_t job_id)
{
char path[PATH_MAX];
struct stat statbuf;
if (snprintf(path, PATH_MAX, "%s/job_%u", uidcg, job_id) >= PATH_MAX) {
info("snprintf: '%s/job_%u' longer than PATH_MAX of %d",
uidcg, job_id, PATH_MAX);
return 0;
}
if (stat(path, &statbuf) != 0) {
info("Couldn't stat path '%s': %m", path);
return 0;
}
return statbuf.st_mtime;
}
static int _check_cg_version()
{
char *type;
int cg_ver = 0;
/* Check cgroup version */
type = slurm_cgroup_conf.cgroup_plugin;
/* Default is autodetect */
if (!type)
type = "autodetect";
if (!xstrcmp(type, "autodetect"))
if (!(type = slurm_autodetect_cgroup_version()))
return cg_ver;
if (!xstrcmp("cgroup/v1", type))
cg_ver = 1;
else if (!xstrcmp("cgroup/v2", type))
cg_ver = 2;
return cg_ver;
}
/*
* Pick a random job belonging to this user.
* Unlike when using cgroup/v1, we will pick here the job with the highest JobID
* instead of getting the job which has the earliest cgroup creation time.
*/
static int _indeterminate_multiple_v2(pam_handle_t *pamh, list_t *steps,
uid_t uid, step_loc_t **out_stepd)
{
int rc = PAM_PERM_DENIED;
list_itr_t *itr = NULL;
step_loc_t *stepd = NULL;
uint32_t most_recent = 0;
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
if ((stepd->step_id.step_id == SLURM_EXTERN_CONT) &&
(uid == _get_job_uid(stepd))) {
if (stepd->step_id.job_id > most_recent) {
most_recent = stepd->step_id.job_id;
*out_stepd = stepd;
rc = PAM_SUCCESS;
}
}
}
if (rc != PAM_SUCCESS) {
if (opts.action_no_jobs == CALLERID_ACTION_DENY) {
debug("uid %u owns no jobs => deny", uid);
send_user_msg(pamh, "Access denied by " PAM_MODULE_NAME
": you have no active jobs on this node");
rc = PAM_PERM_DENIED;
} else {
debug("uid %u owns no jobs but action_no_jobs=allow",
uid);
rc = PAM_SUCCESS;
}
}
list_iterator_destroy(itr);
return rc;
}
static int _indeterminate_multiple(pam_handle_t *pamh, list_t *steps, uid_t uid,
step_loc_t **out_stepd)
{
list_itr_t *itr = NULL;
int rc = PAM_PERM_DENIED;
step_loc_t *stepd = NULL;
time_t most_recent = 0, cgroup_time = 0;
char uidcg[PATH_MAX];
char *cgroup_suffix = "";
char *cgroup_res = "";
int cg_ver;
if (opts.action_unknown == CALLERID_ACTION_DENY) {
debug("Denying due to action_unknown=deny");
send_user_msg(pamh,
"Access denied by "
PAM_MODULE_NAME
": unable to determine source job");
return PAM_PERM_DENIED;
}
cg_ver = _check_cg_version();
debug("Detected cgroup version %d", cg_ver);
if (cg_ver != 1 && cg_ver != 2)
return PAM_SESSION_ERR;
if (cg_ver == 2)
return _indeterminate_multiple_v2(pamh, steps, uid, out_stepd);
if (opts.node_name)
cgroup_suffix = xstrdup_printf("_%s", opts.node_name);
/* pick a cgroup that is likely to exist */
if (slurm_cgroup_conf.constrain_ram_space ||
slurm_cgroup_conf.constrain_swap_space) {
cgroup_res = "memory";
} else if (slurm_cgroup_conf.constrain_cores) {
cgroup_res = "cpuset";
} else if (slurm_cgroup_conf.constrain_devices) {
cgroup_res = "devices";
} else {
/* last resort, from proctrack/cgroup */
cgroup_res = "freezer";
}
if (snprintf(uidcg, PATH_MAX, "%s/%s/slurm%s/uid_%u",
slurm_cgroup_conf.cgroup_mountpoint, cgroup_res,
cgroup_suffix, uid)
>= PATH_MAX) {
info("snprintf: '%s/%s/slurm%s/uid_%u' longer than PATH_MAX of %d",
slurm_cgroup_conf.cgroup_mountpoint, cgroup_res,
cgroup_suffix, uid, PATH_MAX);
/* Make the uidcg an empty string. This will effectively switch
* to a (somewhat) random selection of job rather than picking
* the latest, but how did you overflow PATH_MAX chars anyway?
*/
uidcg[0] = '\0';
}
if (opts.node_name)
xfree(cgroup_suffix);
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
/*
* Only use container steps from this user
*/
if ((stepd->step_id.step_id == SLURM_EXTERN_CONT) &&
(uid == _get_job_uid(stepd))) {
cgroup_time = _cgroup_creation_time(
uidcg, stepd->step_id.job_id);
/* Return the newest job_id, according to cgroup
* creation. Hopefully this is a good way to do this */
if (cgroup_time >= most_recent) {
most_recent = cgroup_time;
*out_stepd = stepd;
rc = PAM_SUCCESS;
}
}
}
/* No jobs from this user exist on this node. This should have been
* caught earlier but wasn't for some reason. */
if (rc != PAM_SUCCESS) {
if (opts.action_no_jobs == CALLERID_ACTION_DENY) {
debug("uid %u owns no jobs => deny", uid);
send_user_msg(pamh, "Access denied by " PAM_MODULE_NAME
": you have no active jobs on this node");
rc = PAM_PERM_DENIED;
} else {
debug("uid %u owns no jobs but action_no_jobs=allow",
uid);
rc = PAM_SUCCESS;
}
}
list_iterator_destroy(itr);
return rc;
}
/* This is the action of last resort. If action_unknown=allow, allow it through
* without adoption. Otherwise, call _indeterminate_multiple to pick a job. If
* successful, adopt it into a process and use a return code based on success of
* the adoption and the action_adopt_failure setting. */
static int _action_unknown(pam_handle_t *pamh, struct passwd *pwd, list_t *steps)
{
int rc;
step_loc_t *stepd = NULL;
if (opts.action_unknown == CALLERID_ACTION_ALLOW) {
debug("Allowing due to action_unknown=allow");
return PAM_SUCCESS;
}
/* Both the single job check and the RPC call have failed to ascertain
* the correct job to adopt this into. Time for drastic measures */
rc = _indeterminate_multiple(pamh, steps, pwd->pw_uid, &stepd);
if (rc == PAM_SUCCESS) {
info("action_unknown: Picked job %u", stepd->step_id.job_id);
if (_adopt_process(pamh, getpid(), stepd) == SLURM_SUCCESS) {
return PAM_SUCCESS;
}
if (opts.action_adopt_failure == CALLERID_ACTION_ALLOW)
return PAM_SUCCESS;
else
return PAM_PERM_DENIED;
} else {
/* This pam module was worthless, apparently */
debug("_indeterminate_multiple failed to find a job to adopt this into");
return rc;
}
}
/* _user_job_count returns the count of jobs owned by the user AND sets job_id
* to the last job from the user that is found */
static int _user_job_count(list_t *steps, uid_t uid, step_loc_t **out_stepd)
{
list_itr_t *itr = NULL;
int user_job_cnt = 0;
step_loc_t *stepd = NULL;
*out_stepd = NULL;
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
/*
* Only count container steps from this user
*/
if ((stepd->step_id.step_id == SLURM_EXTERN_CONT) &&
(uid == _get_job_uid(stepd))) {
user_job_cnt++;
*out_stepd = stepd;
}
}
list_iterator_destroy(itr);
return user_job_cnt;
}
static int _rpc_network_callerid(callerid_conn_t *conn, char *user_name,
uint32_t *job_id)
{
network_callerid_msg_t req;
char ip_src_str[INET6_ADDRSTRLEN];
char node_name[HOST_NAME_MAX];
memset(&req, 0, sizeof(req));
memcpy((void *)&req.ip_src, (void *)&conn->ip_src, 16);
memcpy((void *)&req.ip_dst, (void *)&conn->ip_dst, 16);
req.port_src = conn->port_src;
req.port_dst = conn->port_dst;
req.af = conn->af;
inet_ntop(req.af, &conn->ip_src, ip_src_str, sizeof(ip_src_str));
if (slurm_network_callerid(req, job_id, node_name, sizeof(node_name))
!= SLURM_SUCCESS) {
debug("From %s port %d as %s: unable to retrieve callerid data from remote slurmd",
ip_src_str, req.port_src, user_name);
return SLURM_ERROR;
} else if (*job_id == NO_VAL) {
debug("From %s port %d as %s: job indeterminate",
ip_src_str, req.port_src, user_name);
return SLURM_ERROR;
} else {
info("From %s port %d as %s: member of job %u",
ip_src_str, req.port_src, user_name, *job_id);
return SLURM_SUCCESS;
}
}
/* Ask the slurmd at the source IP address of the network connection if it knows
* what job initiated this connection. If it can be determined, the process is
* adopted into that job's step_extern. In the event of any failure, it returns
* PAM_IGNORE so that it will fall through to the next action */
static int _try_rpc(pam_handle_t *pamh, struct passwd *pwd)
{
uint32_t job_id;
int rc;
char ip_src_str[INET6_ADDRSTRLEN];
callerid_conn_t conn;
/* Gather network information for RPC call. */
debug("Checking file descriptors for network socket");
/* Check my fds for a network socket */
if (callerid_get_own_netinfo(&conn) != SLURM_SUCCESS) {
/* If this failed, the RPC will surely fail. If we continued
* we'd have to fill in junk for lots of variables. Fall
* through to next action. This is really odd and likely means
* that the kernel doesn't provide the necessary mechanisms to
* view this process' network info or that sshd did something
* different with the arrangement of file descriptors */
error("callerid_get_own_netinfo unable to find network socket");
return PAM_IGNORE;
}
if (inet_ntop(conn.af, &conn.ip_src, ip_src_str, sizeof(ip_src_str))
== NULL) {
/* Somehow we successfully grabbed bad data. Fall through to
* next action. */
error("inet_ntop failed");
return PAM_IGNORE;
}
/* Ask the slurmd at the source IP address about this connection */
rc = _rpc_network_callerid(&conn, pwd->pw_name, &job_id);
if (rc == SLURM_SUCCESS) {
step_loc_t stepd;
memset(&stepd, 0, sizeof(stepd));
/* We only need the step_id struct needed to be filled in here
all the rest isn't needed for the adopt.
*/
stepd.step_id.job_id = job_id;
stepd.step_id.step_id = SLURM_EXTERN_CONT;
stepd.step_id.step_het_comp = NO_VAL;
/* Adopt the process. If the adoption succeeds, return SUCCESS.
* If not, maybe the adoption failed because the user hopped
* into one node and was adopted into a job there that isn't on
* our node here. In that case we got a bad jobid so we'll fall
* through to the next action */
if (_adopt_process(pamh, getpid(), &stepd) == SLURM_SUCCESS)
return PAM_SUCCESS;
else
return PAM_IGNORE;
}
info("From %s port %d as %s: unable to determine source job",
ip_src_str, conn.port_src, pwd->pw_name);
return PAM_IGNORE;
}
/* Use the pam logging function for now since normal logging is not yet
* initialized */
log_level_t _parse_log_level(pam_handle_t *pamh, const char *log_level_str)
{
unsigned int u;
char *endptr;
u = (unsigned int)strtoul(log_level_str, &endptr, 0);
if (endptr && endptr[0]) {
/* not an integer */
if (!strcasecmp(log_level_str, "quiet"))
u = LOG_LEVEL_QUIET;
else if(!strcasecmp(log_level_str, "fatal"))
u = LOG_LEVEL_FATAL;
else if(!strcasecmp(log_level_str, "error"))
u = LOG_LEVEL_ERROR;
else if(!strcasecmp(log_level_str, "info"))
u = LOG_LEVEL_INFO;
else if(!strcasecmp(log_level_str, "verbose"))
u = LOG_LEVEL_VERBOSE;
else if(!strcasecmp(log_level_str, "debug"))
u = LOG_LEVEL_DEBUG;
else if(!strcasecmp(log_level_str, "debug2"))
u = LOG_LEVEL_DEBUG2;
else if(!strcasecmp(log_level_str, "debug3"))
u = LOG_LEVEL_DEBUG3;
else if(!strcasecmp(log_level_str, "debug4"))
u = LOG_LEVEL_DEBUG4;
else if(!strcasecmp(log_level_str, "debug5"))
u = LOG_LEVEL_DEBUG5;
else {
pam_syslog(pamh, LOG_ERR,
"unrecognized log level %s, setting to max",
log_level_str);
/* We'll set it to the highest logging
* level, just to be sure */
u = (unsigned int)LOG_LEVEL_END - 1;
}
} else {
/* An integer was specified */
if (u >= LOG_LEVEL_END) {
pam_syslog(pamh, LOG_ERR,
"log level %u too high, lowering to max", u);
u = (unsigned int)LOG_LEVEL_END - 1;
}
}
return u;
}
/* Use the pam logging function for now, so we need pamh */
static void _parse_opts(pam_handle_t *pamh, int argc, const char **argv)
{
char *v;
for (; argc-- > 0; ++argv) {
if (!xstrncasecmp(*argv, "single_job_skip_rpc=", 20)) {
v = (char *) (20 + *argv);
if (!xstrncasecmp(v, "1", 1))
opts.single_job_skip_rpc = true;
else if (!xstrncasecmp(v, "0", 1))
opts.single_job_skip_rpc = false;
else
pam_syslog(
pamh, LOG_ERR,
"unrecognized single_job_skip_rpc=%s, setting to '1'",
v);
} else if (!xstrncasecmp(*argv, "ignore_root=", 12)) {
v = (char *) (12 + *argv);
if (!xstrncasecmp(v, "1", 1))
opts.ignore_root = true;
else if (!xstrncasecmp(v, "0", 1))
opts.ignore_root = false;
else
pam_syslog(
pamh, LOG_ERR,
"unrecognized ignore_root=%s, setting to '1'",
v);
} else if (!xstrncasecmp(*argv, "action_no_jobs=", 15)) {
v = (char *)(15 + *argv);
if (!xstrncasecmp(v, "deny", 4))
opts.action_no_jobs = CALLERID_ACTION_DENY;
else if (!xstrncasecmp(v, "ignore", 6))
opts.action_no_jobs = CALLERID_ACTION_IGNORE;
else {
pam_syslog(pamh, LOG_ERR,
"unrecognized action_no_jobs=%s, setting to 'deny'",
v);
}
} else if (!xstrncasecmp(*argv,"action_unknown=",15)) {
v = (char *)(15 + *argv);
if (!xstrncasecmp(v, "allow", 5))
opts.action_unknown = CALLERID_ACTION_ALLOW;
else if (!xstrncasecmp(v, "newest", 6))
opts.action_unknown = CALLERID_ACTION_NEWEST;
else if (!xstrncasecmp(v, "deny", 4))
opts.action_unknown = CALLERID_ACTION_DENY;
else {
pam_syslog(pamh, LOG_ERR,
"unrecognized action_unknown=%s, setting to 'newest'",
v);
}
} else if (!xstrncasecmp(*argv,"action_generic_failure=",23)) {
v = (char *)(23 + *argv);
if (!xstrncasecmp(v, "allow", 5))
opts.action_generic_failure =
CALLERID_ACTION_ALLOW;
else if (!xstrncasecmp(v, "ignore", 6))
opts.action_generic_failure =
CALLERID_ACTION_IGNORE;
else if (!xstrncasecmp(v, "deny", 4))
opts.action_generic_failure =
CALLERID_ACTION_DENY;
else {
pam_syslog(pamh, LOG_ERR,
"unrecognized action_generic_failure=%s, setting to 'allow'",
v);
}
} else if (!xstrncasecmp(*argv, "action_adopt_failure=", 21)) {
v = (char *)(21 + *argv);
if (!xstrncasecmp(v, "allow", 5))
opts.action_adopt_failure =
CALLERID_ACTION_ALLOW;
else if (!xstrncasecmp(v, "deny", 4))
opts.action_adopt_failure =
CALLERID_ACTION_DENY;
else {
pam_syslog(pamh, LOG_ERR,
"unrecognized action_adopt_failure=%s, setting to 'allow'",
v);
}
} else if (!xstrncasecmp(*argv, "log_level=", 10)) {
v = (char *)(10 + *argv);
opts.log_level = _parse_log_level(pamh, v);
} else if (!xstrncasecmp(*argv, "nodename=", 9)) {
v = (char *)(9 + *argv);
opts.node_name = xstrdup(v);
} else if (!xstrncasecmp(*argv, "disable_x11=", 12)) {
v = (char *) (12 + *argv);
if (!xstrncasecmp(v, "1", 1))
opts.disable_x11 = true;
else if (!xstrncasecmp(v, "0", 1))
opts.disable_x11 = false;
else
pam_syslog(
pamh, LOG_ERR,
"unrecognized disable_x11=%s, setting to '0'",
v);
} else if (!xstrncasecmp(*argv, "service=", 8)) {
v = (char *)(8 + *argv);
opts.pam_service = xstrdup(v);
} else if (!xstrncasecmp(*argv, "join_container=", 15)) {
v = (char *) (15 + *argv);
if (!xstrncasecmp(v, "true", 4))
opts.join_container = true;
else if (!xstrncasecmp(v, "false", 5))
opts.join_container = false;
else
pam_syslog(
pamh, LOG_ERR,
"unrecognized join_container=%s, setting to 'true'",
v);
} else {
pam_syslog(pamh, LOG_ERR,
"ignoring unrecognized option '%s'", *argv);
}
}
}
static void _log_init(log_level_t level)
{
log_options_t logopts = LOG_OPTS_INITIALIZER;
logopts.stderr_level = LOG_LEVEL_FATAL;
logopts.syslog_level = level;
log_init(PAM_MODULE_NAME, logopts, LOG_AUTHPRIV, NULL);
}
/* Make sure to only continue if we're running in the sshd context
*
* If this module is used locally e.g. via sudo then unexpected things might
* happen (e.g. passing environment variables interpreted by slurm code like
* SLURM_CONF or inheriting file descriptors that are used by _try_rpc()).
*/
static int check_pam_service(pam_handle_t *pamh)
{
const char *allowed = opts.pam_service ? opts.pam_service : "sshd";
char *service = NULL;
int rc;
if (!xstrcmp(allowed, "*"))
// any service name is allowed
return PAM_SUCCESS;
rc = pam_get_item(pamh, PAM_SERVICE, (void*)&service);
if (rc != PAM_SUCCESS) {
pam_syslog(pamh, LOG_ERR, "failed to obtain PAM_SERVICE name");
return rc;
} else if (!service) {
// this shouldn't actually happen
return PAM_BAD_ITEM;
}
if (!xstrcmp(service, allowed)) {
return PAM_SUCCESS;
}
pam_syslog(pamh, LOG_INFO,
"Not adopting process since this is not an allowed pam service");
return PAM_IGNORE;
}
/* Parse arguments, etc then get my socket address/port information. Attempt to
* adopt this process into a job in the following order:
* 1) If the user has only one job on the node, pick that one
* 2) Send RPC to source IP of socket. If there is a slurmd at the IP
* address, ask it which job I belong to. On success, pick that one
* 3) Pick a job semi-randomly (default) or skip the adoption (if
* configured)
*/
PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags
__attribute__((unused)), int argc, const char **argv)
{
int retval = PAM_IGNORE, rc = PAM_IGNORE, slurmrc, bufsize, user_jobs;
char *user_name;
list_t *steps = NULL;
step_loc_t *stepd = NULL;
struct passwd pwd, *pwd_result;
char *buf = NULL;
_init_opts();
_parse_opts(pamh, argc, argv);
retval = check_pam_service(pamh);
if (retval != PAM_SUCCESS) {
return retval;
}
_log_init(opts.log_level);
switch (opts.action_generic_failure) {
case CALLERID_ACTION_DENY:
rc = PAM_PERM_DENIED;
break;
case CALLERID_ACTION_ALLOW:
rc = PAM_SUCCESS;
break;
case CALLERID_ACTION_IGNORE:
rc = PAM_IGNORE;
break;
/* Newer gcc versions warn if enum cases are missing */
default:
error("The code is broken!!!!");
}
retval = pam_get_item(pamh, PAM_USER, (void *) &user_name);
if (user_name == NULL || retval != PAM_SUCCESS) {
pam_syslog(pamh, LOG_ERR, "No username in PAM_USER? Fail!");
return PAM_SESSION_ERR;
}
/* Check for an unsafe config that might lock out root. This is a very
* basic check that shouldn't be 100% relied on */
if (!opts.ignore_root &&
(opts.action_unknown == CALLERID_ACTION_DENY ||
opts.action_no_jobs != CALLERID_ACTION_ALLOW ||
opts.action_adopt_failure != CALLERID_ACTION_ALLOW ||
opts.action_generic_failure != CALLERID_ACTION_ALLOW
)) {
/* Let's get verbose */
info("===============================");
info("Danger!!!");
info("A crazy admin set ignore_root=0 and some unsafe actions");
info("You might lock out root!");
info("If this is desirable, modify the source code");
info("Setting ignore_root=1 and continuing");
opts.ignore_root = 1;
}
/* Calculate buffer size for getpwnam_r */
bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
if (bufsize == -1)
bufsize = 16384; /* take a large guess */
buf = xmalloc(bufsize);
retval = getpwnam_r(user_name, &pwd, buf, bufsize, &pwd_result);
if (pwd_result == NULL) {
if (retval == 0) {
error("getpwnam_r could not locate %s", user_name);
} else {
errno = retval;
error("getpwnam_r: %m");
}
xfree(buf);
return PAM_SESSION_ERR;
}
/* Ignoring root is probably best but the admin can allow it */
if (pwd.pw_uid == 0) {
if (opts.ignore_root) {
info("Ignoring root user");
return PAM_IGNORE;
} else {
/* This administrator is crazy */
info("Danger!!! This is a connection attempt by root (user id 0) and ignore_root=0 is set! Hope for the best!");
}
}
/*
* Initialize Slurm after root has been granted access but before but
* before any Slurm API calls are made. It is critical this happens
* after root is handled to prevent locking an admin out of a node if
* there is a problem initializing Slurm.
*/
slurm_init(NULL);
slurm_cgroup_conf_init();
/*
* Check if there are any steps on the node from any user. A failure here
* likely means failures everywhere so exit on failure or if no local jobs
* exist. This can also happen if SlurmdSpoolDir cannot be found, or if
* the NodeName cannot be established for some reason.
*/
steps = stepd_available(NULL, opts.node_name);
if (!steps) {
send_user_msg(pamh, "No Slurm jobs found on node.");
goto cleanup;
}
/* Check to see if this user has only one job on the node. If so, choose
* that job and adopt this process into it (unless configured not to) */
user_jobs = _user_job_count(steps, pwd.pw_uid, &stepd);
if (user_jobs == 0) {
if (opts.action_no_jobs == CALLERID_ACTION_DENY) {
debug("uid %u owns no jobs => deny", pwd.pw_uid);
send_user_msg(pamh, "Access denied by " PAM_MODULE_NAME
": you have no active jobs on this node");
rc = PAM_PERM_DENIED;
} else {
debug("uid %u owns no jobs but action_no_jobs=ignore",
pwd.pw_uid);
rc = PAM_IGNORE;
}
goto cleanup;
} else if (user_jobs == 1) {
if (opts.single_job_skip_rpc) {
info("Connection by user %s: user has only one job %u",
user_name, stepd->step_id.job_id);
slurmrc = _adopt_process(pamh, getpid(), stepd);
/* If adoption into the only job fails, it is time to
* exit. Return code is based on the
* action_adopt_failure setting */
if (slurmrc == SLURM_SUCCESS ||
(opts.action_adopt_failure ==
CALLERID_ACTION_ALLOW))
rc = PAM_SUCCESS;
else {
send_user_msg(pamh, "Access denied by "
PAM_MODULE_NAME
": failed to adopt process into cgroup, denying access because action_adopt_failure=deny");
rc = PAM_PERM_DENIED;
}
goto cleanup;
}
} else {
debug("uid %u has %d jobs", pwd.pw_uid, user_jobs);
}
/* Single job check turned up nothing (or we skipped it). Make RPC call
* to slurmd at source IP. If it can tell us the job, the function calls
* _adopt_process */
rc = _try_rpc(pamh, &pwd);
if (rc == PAM_SUCCESS)
goto cleanup;
/* The source of the connection either didn't reply or couldn't
* determine the job ID at the source. Proceed to action_unknown */
rc = _action_unknown(pamh, &pwd, steps);
cleanup:
slurm_cgroup_conf_destroy();
FREE_NULL_LIST(steps);
xfree(buf);
xfree(opts.node_name);
xfree(opts.pam_service);
return rc;
}
#ifdef PAM_STATIC
struct pam_module _pam_slurm_adopt_modstruct = {
PAM_MODULE_NAME,
NULL,
NULL,
pam_sm_acct_mgmt,
NULL,
NULL,
NULL,
};
#endif