| /*****************************************************************************\ |
| * slurm_acct_gather.c - generic interface needed for some |
| * acct_gather plugins. |
| ***************************************************************************** |
| * Copyright (C) SchedMD LLC. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <sys/stat.h> |
| #include <stdlib.h> |
| |
| #include "acct_gather.h" |
| #include "acct_gather_energy.h" |
| #include "acct_gather_interconnect.h" |
| #include "acct_gather_filesystem.h" |
| |
| #include "src/common/pack.h" |
| #include "src/common/parse_config.h" |
| #include "src/common/xstring.h" |
| |
| static bool acct_gather_suspended = false; |
| static pthread_mutex_t suspended_mutex = PTHREAD_MUTEX_INITIALIZER; |
| static pthread_mutex_t conf_mutex = PTHREAD_MUTEX_INITIALIZER; |
| static buf_t *acct_gather_options_buf = NULL; |
| static bool inited = 0; |
| |
| static int _get_int(const char *my_str) |
| { |
| char *end = NULL; |
| int value; |
| |
| if (!my_str) |
| return -1; |
| value = strtol(my_str, &end, 10); |
| //info("from %s I get %d and %s: %m", my_str, value, end); |
| /* means no numbers */ |
| if (my_str == end) |
| return -1; |
| |
| return value; |
| } |
| |
| static int _process_tbl(s_p_hashtbl_t *tbl) |
| { |
| int rc = 0; |
| |
| /* handle acct_gather.conf in each plugin */ |
| slurm_mutex_lock(&conf_mutex); |
| rc += acct_gather_energy_g_conf_set(tbl); |
| rc += acct_gather_profile_g_conf_set(tbl); |
| rc += acct_gather_interconnect_g_conf_set(tbl); |
| rc += acct_gather_filesystem_g_conf_set(tbl); |
| /*********************************************************************/ |
| /* ADD MORE HERE AND FREE MEMORY IN acct_gather_conf_destroy() BELOW */ |
| /*********************************************************************/ |
| slurm_mutex_unlock(&conf_mutex); |
| |
| return rc; |
| } |
| |
| extern int acct_gather_conf_init(void) |
| { |
| s_p_hashtbl_t *tbl = NULL; |
| char *conf_path = NULL; |
| s_p_options_t *full_options = NULL; |
| int full_options_cnt = 0, i; |
| struct stat buf; |
| int rc = SLURM_SUCCESS; |
| |
| if (inited) |
| return SLURM_SUCCESS; |
| inited = 1; |
| |
| /* initialize all the plugins */ |
| rc += acct_gather_energy_init(); |
| rc += acct_gather_profile_init(); |
| rc += acct_gather_interconnect_init(); |
| rc += acct_gather_filesystem_init(); |
| |
| if (rc) |
| return rc; |
| |
| /* get options from plugins using acct_gather.conf */ |
| |
| rc += acct_gather_energy_g_conf_options(&full_options, |
| &full_options_cnt); |
| rc += acct_gather_profile_g_conf_options(&full_options, |
| &full_options_cnt); |
| rc += acct_gather_interconnect_g_conf_options(&full_options, |
| &full_options_cnt); |
| rc += acct_gather_filesystem_g_conf_options(&full_options, |
| &full_options_cnt); |
| /* ADD MORE HERE */ |
| |
| /* for the NULL at the end */ |
| xrealloc(full_options, |
| ((full_options_cnt + 1) * sizeof(s_p_options_t))); |
| |
| tbl = s_p_hashtbl_create(full_options); |
| |
| /**************************************************/ |
| |
| /* Get the acct_gather.conf path and validate the file */ |
| conf_path = get_extra_conf_path("acct_gather.conf"); |
| if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { |
| debug2("No acct_gather.conf file (%s)", conf_path); |
| } else { |
| debug2("Reading acct_gather.conf file %s", conf_path); |
| |
| if (s_p_parse_file(tbl, NULL, conf_path, 0, NULL) == |
| SLURM_ERROR) { |
| fatal("Could not open/read/parse acct_gather.conf file " |
| "%s. Many times this is because you have " |
| "defined options for plugins that are not " |
| "loaded. Please check your slurm.conf file " |
| "and make sure the plugins for the options " |
| "listed are loaded.", |
| conf_path); |
| } |
| } |
| |
| rc += _process_tbl(tbl); |
| |
| acct_gather_options_buf = s_p_pack_hashtbl( |
| tbl, full_options, full_options_cnt); |
| |
| for (i=0; i<full_options_cnt; i++) |
| xfree(full_options[i].key); |
| xfree(full_options); |
| xfree(conf_path); |
| |
| s_p_hashtbl_destroy(tbl); |
| |
| return rc; |
| } |
| |
| extern int acct_gather_write_conf(int fd) |
| { |
| int len; |
| |
| acct_gather_conf_init(); |
| |
| slurm_mutex_lock(&conf_mutex); |
| len = get_buf_offset(acct_gather_options_buf); |
| safe_write(fd, &len, sizeof(int)); |
| safe_write(fd, get_buf_data(acct_gather_options_buf), len); |
| slurm_mutex_unlock(&conf_mutex); |
| |
| return 0; |
| |
| rwfail: |
| slurm_mutex_unlock(&conf_mutex); |
| return -1; |
| } |
| |
| extern int acct_gather_read_conf(int fd) |
| { |
| int len; |
| s_p_hashtbl_t *tbl; |
| |
| safe_read(fd, &len, sizeof(int)); |
| |
| acct_gather_options_buf = init_buf(len); |
| safe_read(fd, acct_gather_options_buf->head, len); |
| |
| if (!(tbl = s_p_unpack_hashtbl(acct_gather_options_buf))) |
| return SLURM_ERROR; |
| |
| /* |
| * We need to set inited before calling _process_tbl or we will get |
| * deadlock since the other acct_gather_* plugins will call |
| * acct_gather_init(). |
| */ |
| inited = true; |
| (void)_process_tbl(tbl); |
| |
| s_p_hashtbl_destroy(tbl); |
| |
| return SLURM_SUCCESS; |
| rwfail: |
| return SLURM_ERROR; |
| } |
| |
| extern int acct_gather_conf_destroy(void) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| if (!inited) |
| return SLURM_SUCCESS; |
| |
| inited = false; |
| |
| if (acct_gather_energy_fini() != SLURM_SUCCESS) |
| rc = SLURM_ERROR; |
| |
| if (acct_gather_filesystem_fini() != SLURM_SUCCESS) |
| rc = SLURM_ERROR; |
| |
| if (acct_gather_interconnect_fini() != SLURM_SUCCESS) |
| rc = SLURM_ERROR; |
| |
| if (acct_gather_profile_fini() != SLURM_SUCCESS) |
| rc = SLURM_ERROR; |
| |
| FREE_NULL_BUFFER(acct_gather_options_buf); |
| |
| slurm_mutex_destroy(&conf_mutex); |
| return rc; |
| } |
| |
| extern list_t *acct_gather_conf_values(void) |
| { |
| list_t *acct_list = list_create(destroy_config_key_pair); |
| |
| /* get acct_gather.conf in each plugin */ |
| slurm_mutex_lock(&conf_mutex); |
| acct_gather_profile_g_conf_values(&acct_list); |
| acct_gather_interconnect_g_conf_values(&acct_list); |
| acct_gather_energy_g_conf_values(&acct_list); |
| acct_gather_filesystem_g_conf_values(&acct_list); |
| /* ADD MORE HERE */ |
| slurm_mutex_unlock(&conf_mutex); |
| /******************************************/ |
| |
| list_sort(acct_list, (ListCmpF) sort_key_pairs); |
| |
| return acct_list; |
| } |
| |
| extern int acct_gather_parse_freq(int type, char *freq) |
| { |
| int freq_int = -1; |
| char *sub_str = NULL; |
| |
| if (!freq) |
| return freq_int; |
| |
| switch (type) { |
| case PROFILE_ENERGY: |
| if ((sub_str = xstrcasestr(freq, "energy="))) |
| freq_int = _get_int(sub_str + 7); |
| break; |
| case PROFILE_TASK: |
| /* backwards compatibility for when the freq was only |
| for task. |
| */ |
| freq_int = _get_int(freq); |
| if ((freq_int == -1) |
| && (sub_str = xstrcasestr(freq, "task="))) |
| freq_int = _get_int(sub_str + 5); |
| break; |
| case PROFILE_FILESYSTEM: |
| if ((sub_str = xstrcasestr(freq, "filesystem="))) |
| freq_int = _get_int(sub_str + 11); |
| break; |
| case PROFILE_NETWORK: |
| if ((sub_str = xstrcasestr(freq, "network="))) |
| freq_int = _get_int(sub_str + 8); |
| break; |
| default: |
| fatal("Unhandled profile option %d please update " |
| "slurm_acct_gather.c " |
| "(acct_gather_parse_freq)", type); |
| } |
| |
| return freq_int; |
| } |
| |
| extern int acct_gather_check_acct_freq_task(uint64_t job_mem_lim, |
| char *acctg_freq) |
| { |
| int task_freq; |
| static uint32_t acct_freq_task = NO_VAL; |
| |
| if (acct_freq_task == NO_VAL) { |
| int i = acct_gather_parse_freq(PROFILE_TASK, |
| slurm_conf.job_acct_gather_freq); |
| |
| /* If the value is -1 lets set the freq to something |
| really high so we don't check this again. |
| */ |
| if (i == -1) |
| acct_freq_task = NO_VAL16; |
| else |
| acct_freq_task = i; |
| } |
| |
| if (!job_mem_lim || !acct_freq_task) |
| return 0; |
| |
| task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); |
| |
| if (task_freq == -1) |
| return 0; |
| |
| if (task_freq == 0) { |
| error("Can't turn accounting frequency off. " |
| "We need it to monitor memory usage."); |
| errno = ESLURMD_INVALID_ACCT_FREQ; |
| return 1; |
| } else if (task_freq > acct_freq_task) { |
| error("Can't set frequency to %d, it is higher than %u. " |
| "We need it to be at least at this level to " |
| "monitor memory usage.", |
| task_freq, acct_freq_task); |
| errno = ESLURMD_INVALID_ACCT_FREQ; |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| extern void acct_gather_suspend_poll(void) |
| { |
| slurm_mutex_lock(&suspended_mutex); |
| acct_gather_suspended = true; |
| slurm_mutex_unlock(&suspended_mutex); |
| } |
| |
| extern void acct_gather_resume_poll(void) |
| { |
| slurm_mutex_lock(&suspended_mutex); |
| acct_gather_suspended = false; |
| slurm_mutex_unlock(&suspended_mutex); |
| } |
| |
| extern bool acct_gather_suspend_test(void) |
| { |
| bool rc; |
| slurm_mutex_lock(&suspended_mutex); |
| rc = acct_gather_suspended; |
| slurm_mutex_unlock(&suspended_mutex); |
| return rc; |
| } |