blob: ae3748fea5555d7bb9ef099d1c55569a5e1cd852 [file] [log] [blame]
/*****************************************************************************\
* slurm_acct_gather.c - generic interface needed for some
* acct_gather plugins.
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include <sys/stat.h>
#include <stdlib.h>
#include "acct_gather.h"
#include "acct_gather_energy.h"
#include "acct_gather_interconnect.h"
#include "acct_gather_filesystem.h"
#include "src/common/pack.h"
#include "src/common/parse_config.h"
#include "src/common/xstring.h"
static bool acct_gather_suspended = false;
static pthread_mutex_t suspended_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t conf_mutex = PTHREAD_MUTEX_INITIALIZER;
static buf_t *acct_gather_options_buf = NULL;
static bool inited = 0;
static int _get_int(const char *my_str)
{
char *end = NULL;
int value;
if (!my_str)
return -1;
value = strtol(my_str, &end, 10);
//info("from %s I get %d and %s: %m", my_str, value, end);
/* means no numbers */
if (my_str == end)
return -1;
return value;
}
static int _process_tbl(s_p_hashtbl_t *tbl)
{
int rc = 0;
/* handle acct_gather.conf in each plugin */
slurm_mutex_lock(&conf_mutex);
rc += acct_gather_energy_g_conf_set(tbl);
rc += acct_gather_profile_g_conf_set(tbl);
rc += acct_gather_interconnect_g_conf_set(tbl);
rc += acct_gather_filesystem_g_conf_set(tbl);
/*********************************************************************/
/* ADD MORE HERE AND FREE MEMORY IN acct_gather_conf_destroy() BELOW */
/*********************************************************************/
slurm_mutex_unlock(&conf_mutex);
return rc;
}
extern int acct_gather_conf_init(void)
{
s_p_hashtbl_t *tbl = NULL;
char *conf_path = NULL;
s_p_options_t *full_options = NULL;
int full_options_cnt = 0, i;
struct stat buf;
int rc = SLURM_SUCCESS;
if (inited)
return SLURM_SUCCESS;
inited = 1;
/* initialize all the plugins */
rc += acct_gather_energy_init();
rc += acct_gather_profile_init();
rc += acct_gather_interconnect_init();
rc += acct_gather_filesystem_init();
if (rc)
return rc;
/* get options from plugins using acct_gather.conf */
rc += acct_gather_energy_g_conf_options(&full_options,
&full_options_cnt);
rc += acct_gather_profile_g_conf_options(&full_options,
&full_options_cnt);
rc += acct_gather_interconnect_g_conf_options(&full_options,
&full_options_cnt);
rc += acct_gather_filesystem_g_conf_options(&full_options,
&full_options_cnt);
/* ADD MORE HERE */
/* for the NULL at the end */
xrealloc(full_options,
((full_options_cnt + 1) * sizeof(s_p_options_t)));
tbl = s_p_hashtbl_create(full_options);
/**************************************************/
/* Get the acct_gather.conf path and validate the file */
conf_path = get_extra_conf_path("acct_gather.conf");
if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) {
debug2("No acct_gather.conf file (%s)", conf_path);
} else {
debug2("Reading acct_gather.conf file %s", conf_path);
if (s_p_parse_file(tbl, NULL, conf_path, 0, NULL) ==
SLURM_ERROR) {
fatal("Could not open/read/parse acct_gather.conf file "
"%s. Many times this is because you have "
"defined options for plugins that are not "
"loaded. Please check your slurm.conf file "
"and make sure the plugins for the options "
"listed are loaded.",
conf_path);
}
}
rc += _process_tbl(tbl);
acct_gather_options_buf = s_p_pack_hashtbl(
tbl, full_options, full_options_cnt);
for (i=0; i<full_options_cnt; i++)
xfree(full_options[i].key);
xfree(full_options);
xfree(conf_path);
s_p_hashtbl_destroy(tbl);
return rc;
}
extern int acct_gather_write_conf(int fd)
{
int len;
acct_gather_conf_init();
slurm_mutex_lock(&conf_mutex);
len = get_buf_offset(acct_gather_options_buf);
safe_write(fd, &len, sizeof(int));
safe_write(fd, get_buf_data(acct_gather_options_buf), len);
slurm_mutex_unlock(&conf_mutex);
return 0;
rwfail:
slurm_mutex_unlock(&conf_mutex);
return -1;
}
extern int acct_gather_read_conf(int fd)
{
int len;
s_p_hashtbl_t *tbl;
safe_read(fd, &len, sizeof(int));
acct_gather_options_buf = init_buf(len);
safe_read(fd, acct_gather_options_buf->head, len);
if (!(tbl = s_p_unpack_hashtbl(acct_gather_options_buf)))
return SLURM_ERROR;
/*
* We need to set inited before calling _process_tbl or we will get
* deadlock since the other acct_gather_* plugins will call
* acct_gather_init().
*/
inited = true;
(void)_process_tbl(tbl);
s_p_hashtbl_destroy(tbl);
return SLURM_SUCCESS;
rwfail:
return SLURM_ERROR;
}
extern int acct_gather_conf_destroy(void)
{
int rc = SLURM_SUCCESS;
if (!inited)
return SLURM_SUCCESS;
inited = false;
if (acct_gather_energy_fini() != SLURM_SUCCESS)
rc = SLURM_ERROR;
if (acct_gather_filesystem_fini() != SLURM_SUCCESS)
rc = SLURM_ERROR;
if (acct_gather_interconnect_fini() != SLURM_SUCCESS)
rc = SLURM_ERROR;
if (acct_gather_profile_fini() != SLURM_SUCCESS)
rc = SLURM_ERROR;
FREE_NULL_BUFFER(acct_gather_options_buf);
slurm_mutex_destroy(&conf_mutex);
return rc;
}
extern list_t *acct_gather_conf_values(void)
{
list_t *acct_list = list_create(destroy_config_key_pair);
/* get acct_gather.conf in each plugin */
slurm_mutex_lock(&conf_mutex);
acct_gather_profile_g_conf_values(&acct_list);
acct_gather_interconnect_g_conf_values(&acct_list);
acct_gather_energy_g_conf_values(&acct_list);
acct_gather_filesystem_g_conf_values(&acct_list);
/* ADD MORE HERE */
slurm_mutex_unlock(&conf_mutex);
/******************************************/
list_sort(acct_list, (ListCmpF) sort_key_pairs);
return acct_list;
}
extern int acct_gather_parse_freq(int type, char *freq)
{
int freq_int = -1;
char *sub_str = NULL;
if (!freq)
return freq_int;
switch (type) {
case PROFILE_ENERGY:
if ((sub_str = xstrcasestr(freq, "energy=")))
freq_int = _get_int(sub_str + 7);
break;
case PROFILE_TASK:
/* backwards compatibility for when the freq was only
for task.
*/
freq_int = _get_int(freq);
if ((freq_int == -1)
&& (sub_str = xstrcasestr(freq, "task=")))
freq_int = _get_int(sub_str + 5);
break;
case PROFILE_FILESYSTEM:
if ((sub_str = xstrcasestr(freq, "filesystem=")))
freq_int = _get_int(sub_str + 11);
break;
case PROFILE_NETWORK:
if ((sub_str = xstrcasestr(freq, "network=")))
freq_int = _get_int(sub_str + 8);
break;
default:
fatal("Unhandled profile option %d please update "
"slurm_acct_gather.c "
"(acct_gather_parse_freq)", type);
}
return freq_int;
}
extern int acct_gather_check_acct_freq_task(uint64_t job_mem_lim,
char *acctg_freq)
{
int task_freq;
static uint32_t acct_freq_task = NO_VAL;
if (acct_freq_task == NO_VAL) {
int i = acct_gather_parse_freq(PROFILE_TASK,
slurm_conf.job_acct_gather_freq);
/* If the value is -1 lets set the freq to something
really high so we don't check this again.
*/
if (i == -1)
acct_freq_task = NO_VAL16;
else
acct_freq_task = i;
}
if (!job_mem_lim || !acct_freq_task)
return 0;
task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq);
if (task_freq == -1)
return 0;
if (task_freq == 0) {
error("Can't turn accounting frequency off. "
"We need it to monitor memory usage.");
errno = ESLURMD_INVALID_ACCT_FREQ;
return 1;
} else if (task_freq > acct_freq_task) {
error("Can't set frequency to %d, it is higher than %u. "
"We need it to be at least at this level to "
"monitor memory usage.",
task_freq, acct_freq_task);
errno = ESLURMD_INVALID_ACCT_FREQ;
return 1;
}
return 0;
}
extern void acct_gather_suspend_poll(void)
{
slurm_mutex_lock(&suspended_mutex);
acct_gather_suspended = true;
slurm_mutex_unlock(&suspended_mutex);
}
extern void acct_gather_resume_poll(void)
{
slurm_mutex_lock(&suspended_mutex);
acct_gather_suspended = false;
slurm_mutex_unlock(&suspended_mutex);
}
extern bool acct_gather_suspend_test(void)
{
bool rc;
slurm_mutex_lock(&suspended_mutex);
rc = acct_gather_suspended;
slurm_mutex_unlock(&suspended_mutex);
return rc;
}