blob: 3dc05a8f3fd58e15caefd837d2b2b9f7af00fccb [file] [log] [blame]
/*****************************************************************************\
* rate_limit.c
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include <stdbool.h>
#include "src/common/macros.h"
#include "src/common/slurm_protocol_defs.h"
#include "src/common/xstring.h"
#include "src/interfaces/conn.h"
#include "src/slurmctld/proc_req.h"
#include "src/slurmctld/slurmctld.h"
/*
* last_update is scaled by refill_period, and is not the direct unix time
*/
typedef struct {
time_t last_update;
time_t last_logged;
uint32_t tokens;
uid_t uid;
} user_bucket_t;
static int table_size = 8192;
static user_bucket_t *user_buckets = NULL;
static bool rate_limit_enabled = false;
static pthread_mutex_t rate_limit_mutex = PTHREAD_MUTEX_INITIALIZER;
/* 30 tokens max, bucket refills 2 tokens per 1 second */
static int bucket_size = 30;
static int log_freq = 0;
static int refill_rate = 2;
static int refill_period = 1;
extern void rate_limit_init(void)
{
char *tmp_ptr;
if (!xstrcasestr(slurm_conf.slurmctld_params, "rl_enable"))
return;
if ((tmp_ptr = xstrcasestr(slurm_conf.slurmctld_params,
"rl_table_size=")))
table_size = atoi(tmp_ptr + 14);
if ((tmp_ptr = xstrcasestr(slurm_conf.slurmctld_params,
"rl_bucket_size=")))
bucket_size = atoi(tmp_ptr + 15);
if ((tmp_ptr = xstrcasestr(slurm_conf.slurmctld_params,
"rl_log_freq=")))
log_freq = atoi(tmp_ptr + 12);
if ((tmp_ptr = xstrcasestr(slurm_conf.slurmctld_params,
"rl_refill_rate=")))
refill_rate = atoi(tmp_ptr + 15);
if ((tmp_ptr = xstrcasestr(slurm_conf.slurmctld_params,
"rl_refill_period=")))
refill_period = atoi(tmp_ptr + 17);
rate_limit_enabled = true;
user_buckets = xcalloc(table_size, sizeof(user_bucket_t));
info("RPC rate limiting enabled");
info("%s: rl_table_size=%d,rl_bucket_size=%d,rl_refill_rate=%d,rl_refill_period=%d",
__func__, table_size, bucket_size, refill_rate, refill_period);
}
extern void rate_limit_shutdown(void)
{
slurm_mutex_lock(&rate_limit_mutex);
rate_limit_enabled = false;
xfree(user_buckets);
slurm_mutex_unlock(&rate_limit_mutex);
}
/*
* Return true if the limit's been exceeded.
* False otherwise.
*/
extern bool rate_limit_exceeded(slurm_msg_t *msg)
{
slurmctld_rpc_t *this_rpc = NULL;
bool exceeded = false;
int start_position = 0, position = 0;
time_t now;
if (!rate_limit_enabled)
return false;
if ((this_rpc = find_rpc(msg->msg_type)) && this_rpc->rl_exempt)
return false;
/*
* Exempt SlurmUser / root. Subjecting internal cluster traffic to
* the rate limit would break things really quickly. :)
* (We're assuming SlurmdUser is root here.)
*/
if (validate_slurm_user(msg->auth_uid))
return false;
slurm_mutex_lock(&rate_limit_mutex);
/* This can happen if we already executed rate_limit_shutdown */
if (!user_buckets) {
slurm_mutex_unlock(&rate_limit_mutex);
return true;
}
now = time(NULL);
/*
* Scan for position. Note that uid 0 indicates an unused slot,
* since root is never subjected to the rate limit.
* Naively hash the uid into the table. If that's not a match, keep
* scanning for the next vacant spot. Wrap around to the front if
* necessary once we hit the end.
*/
start_position = position = msg->auth_uid % table_size;
while ((user_buckets[position].uid) &&
(user_buckets[position].uid != msg->auth_uid)) {
position++;
if (position == table_size)
position = 0;
if (position == start_position) {
position = table_size;
break;
}
}
if (position == table_size) {
/*
* Avoid the temptation to resize the table... you'd need to
* rehash all the contents which would be annoying and slow.
*/
error("RPC Rate Limiting: ran out of user table space. User will not be limited.");
} else if (!user_buckets[position].uid) {
user_buckets[position].uid = msg->auth_uid;
user_buckets[position].last_update = now / refill_period;
user_buckets[position].tokens = bucket_size - 1;
debug3("%s: new entry for uid %u", __func__, msg->auth_uid);
} else {
time_t now_periods = now / refill_period;
time_t delta = now_periods - user_buckets[position].last_update;
user_buckets[position].last_update = now_periods;
/* add tokens */
if (delta) {
user_buckets[position].tokens += (delta * refill_rate);
user_buckets[position].tokens =
MIN(user_buckets[position].tokens, bucket_size);
}
if (user_buckets[position].tokens)
user_buckets[position].tokens--;
else
exceeded = true;
debug3("%s: found uid %u at position %d remaining tokens %d%s",
__func__, msg->auth_uid, position,
user_buckets[position].tokens,
(exceeded ? " rate limit exceeded" : ""));
}
slurm_mutex_unlock(&rate_limit_mutex);
if (exceeded && (log_freq != -1) &&
((user_buckets[position].last_logged + log_freq) <= now)) {
slurm_addr_t *cli_addr = &msg->address;
if (cli_addr->ss_family == AF_UNSPEC) {
int fd = conn_g_get_fd(msg->tls_conn);
(void) slurm_get_peer_addr(fd, cli_addr);
}
info("RPC rate limit exceeded by uid %u with %s from %pA, telling to back off",
msg->auth_uid, rpc_num2string(msg->msg_type), cli_addr);
user_buckets[position].last_logged = now;
}
return exceeded;
}