blob: 20ecf949e357c06a38be65af9cca1e00c5c84935 [file] [edit]
/*****************************************************************************\
* openmetrics.c - OpenMetrics plugin source file
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#include "openmetrics.h"
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
#include "src/common/list.h"
#include "src/common/log.h"
#include "src/common/xassert.h"
#include "src/common/xhash.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/interfaces/metrics.h"
#include "src/slurmctld/statistics.h"
const char plugin_name[] = "OpenMetrics plugin";
const char plugin_type[] = "metrics/openmetrics";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
#define PLUGIN_ID 0xcafebeef
typedef struct openmetrics_set {
xhash_t *full_hash; /* metrics table exact {name,[key,val]*} lookup */
xhash_t *name_hash; /* table of lists of metrics with the same name */
} openmetrics_set_t;
typedef struct foreach_dump_metric_args {
char **str;
char **pos;
} foreach_dump_metric_args_t;
typedef struct foreach_stats_parse_metric {
char *str;
char *pfx;
metric_set_t *set;
} foreach_stats_parse_metric_t;
// clang-format off
#define ADD_METRIC_KEYVAL_PFX(set, type, data, pfx, name, desc, otype, key, val) \
_metrics_create_kv(set, DATA_PARSER_##type, (void *) &(data), \
sizeof(data), pfx, XSTRINGIFY(name), desc, \
METRIC_TYPE_##otype, key, val)
#define ADD_METRIC_KEYVAL(set, type, data, name, desc, otype, key, val) \
_metrics_create_kv(set, DATA_PARSER_##type, (void *) &(data), \
sizeof(data), NULL, "slurm_" XSTRINGIFY(name), \
desc, METRIC_TYPE_##otype, key, val)
#define ADD_METRIC(set, type, data, name, desc, otype) \
_metrics_create_kv(set, DATA_PARSER_##type, (void *) &(data), \
sizeof(data), NULL, "slurm_" XSTRINGIFY(name), \
desc, METRIC_TYPE_##otype, NULL, NULL)
// clang-format on
extern int init(void)
{
debug("loading %s", plugin_name);
return SLURM_SUCCESS;
}
extern void fini(void)
{
debug("unloading %s", plugin_name);
}
static void _free_openmetrics_set(openmetrics_set_t *ometrics_set_ptr)
{
xassert(ometrics_set_ptr);
xhash_free_ptr(&ometrics_set_ptr->name_hash);
xhash_free_ptr(&ometrics_set_ptr->full_hash);
xfree(ometrics_set_ptr);
}
static char *_make_hash_id(char *name, metric_keyval_t **keyval)
{
char *pos = NULL, *id = NULL;
xstrfmtcatat(id, &pos, "%s", name);
if (keyval && *keyval) {
for (int i = 0; keyval[i]->key; i++) {
xstrfmtcatat(id, &pos, ":%s=%s", keyval[i]->key,
keyval[i]->val);
}
}
return id;
}
/* Free item from xhash_t. Called from function ptr */
static void _free_xhash_name_list(void *item)
{
list_t *l = item;
FREE_NULL_LIST(l);
}
/* Free item from xhash_t. Called from function ptr */
static void _free_xhash_full(void *item)
{
metric_t *m = item;
metrics_free_metric(m);
}
/* Fetch key from xhash_t item. Called from function ptr */
static void _make_xhash_id_name(void *item, const char **key, uint32_t *key_len)
{
list_t *l = (list_t *) item;
metric_t *m = list_peek(l);
*key = m->name;
*key_len = strlen(*key);
}
/* Fetch key from xhash_t item. Called from function ptr */
static void _make_xhash_id(void *item, const char **key, uint32_t *key_len)
{
metric_t *m = (metric_t *) item;
*key = m->id;
*key_len = strlen(*key);
}
static openmetrics_set_t *_init_ometrics_set()
{
openmetrics_set_t *ometrics_set = xmalloc(sizeof(*ometrics_set));
ometrics_set->full_hash = xhash_init(_make_xhash_id, _free_xhash_full);
ometrics_set->name_hash =
xhash_init(_make_xhash_id_name, _free_xhash_name_list);
return ometrics_set;
}
static openmetrics_set_t *_check_set(metric_set_t *set)
{
if (!set || (set->plugin_id != PLUGIN_ID)) {
error("%s: invalid namespace", __func__);
return NULL;
}
return (openmetrics_set_t *) set->arg;
}
extern int metrics_p_free_set(metric_set_t *set)
{
openmetrics_set_t *ometrics_set;
if (!set)
return SLURM_SUCCESS;
if (!(ometrics_set = _check_set(set)))
return SLURM_ERROR;
_free_openmetrics_set(ometrics_set);
set->plugin_id = 0;
set->plugin_type = "";
xfree(set);
return SLURM_SUCCESS;
}
static metric_set_t *_metrics_new_set(void)
{
metric_set_t *set = xmalloc(sizeof(*set));
openmetrics_set_t *ometrics_set = _init_ometrics_set();
*set = (metric_set_t) {
.plugin_id = PLUGIN_ID,
.arg = (void *) ometrics_set,
.plugin_type = plugin_type,
};
return set;
}
static int _metrics_add(metric_set_t *set, metric_t *m)
{
char *hash_id;
list_t *name_list;
openmetrics_set_t *ometrics_set;
if (!(ometrics_set = _check_set(set)))
return SLURM_ERROR;
hash_id = _make_hash_id(m->name, m->keyval);
if (xhash_get_str(ometrics_set->full_hash, hash_id)) {
error("Duplicate key when adding metric: %s", m->name);
xfree(hash_id);
return SLURM_ERROR;
}
if (!m->id)
m->id = hash_id;
else
xfree(hash_id);
xhash_add(ometrics_set->full_hash, m);
name_list = xhash_get_str(ometrics_set->name_hash, m->name);
if (!name_list) {
name_list = list_create(NULL); /* metrics are not freed here */
list_append(name_list, m);
xhash_add(ometrics_set->name_hash, name_list);
} else {
list_append(name_list, m);
}
return SLURM_SUCCESS;
}
static void _dump_metric_value(char **str, char **p, const metric_t *m)
{
switch (m->type) {
case DATA_PARSER_UINT16:
xstrfmtcatat(*str, p, "%hu", *(uint16_t *) m->data);
break;
case DATA_PARSER_UINT32:
xstrfmtcatat(*str, p, "%u", *(uint32_t *) m->data);
break;
case DATA_PARSER_UINT64:
xstrfmtcatat(*str, p, "%llu", *(unsigned long long *) m->data);
break;
case DATA_PARSER_TIMESTAMP:
xstrfmtcatat(*str, p, "%ld", *(time_t *) m->data);
break;
case DATA_PARSER_INT32:
xstrfmtcatat(*str, p, "%d", *(int32_t *) m->data);
break;
case DATA_PARSER_INT64:
xstrfmtcatat(*str, p, "%lld", *(long long *) m->data);
break;
case DATA_PARSER_FLOAT64:
xstrfmtcatat(*str, p, "%lf", *(double *) m->data);
break;
case DATA_PARSER_FLOAT128:
xstrfmtcatat(*str, p, "%Lf", *(long double *) m->data);
break;
default:
xstrfmtcatat(*str, p, "NaN");
break;
}
}
static int _dump_metric_no_desc(void *x, void *arg)
{
metric_t *m = x;
char **str = ((foreach_dump_metric_args_t *) arg)->str;
char **p = ((foreach_dump_metric_args_t *) arg)->pos;
metric_keyval_t *kv;
if (!m->keyval) {
xstrfmtcatat(*str, p, "%s ", m->name);
_dump_metric_value(str, p, m);
xstrfmtcatat(*str, p, "\n");
return SLURM_SUCCESS;
}
xstrfmtcatat(*str, p, "%s{", m->name);
for (int i = 0;; i++) {
kv = m->keyval[i];
if (!kv->key)
break;
xstrfmtcatat(*str, p, "%s=\"%s\"", kv->key, kv->val);
if (m->keyval[i + 1] && m->keyval[i + 1]->key)
xstrfmtcatat(*str, p, ",");
}
xstrfmtcatat(*str, p, "} ");
_dump_metric_value(str, p, m);
xstrfmtcatat(*str, p, "\n");
return SLURM_SUCCESS;
}
static void _dump_metric_desc(metric_t *m, foreach_dump_metric_args_t *arg)
{
char **str = arg->str;
char **p = arg->pos;
xstrfmtcatat(*str, p, "# HELP %s %s\n", m->name, m->desc);
xstrfmtcatat(*str, p, "# TYPE %s %s\n", m->name,
openmetrics_type_str[m->attr]);
}
static void _dump_metrics_from_list(void *item, void *args)
{
list_t *l = (list_t *) item;
metric_t *first;
first = list_peek(l);
_dump_metric_desc(first, args);
list_for_each_ro(l, _dump_metric_no_desc, args);
}
extern int metrics_p_dump(metric_set_t *set, char **buf)
{
openmetrics_set_t *ometrics_set;
char *p = NULL;
foreach_dump_metric_args_t args = { .str = buf, .pos = &p };
if (!(ometrics_set = _check_set(set)) || !buf || *buf)
return SLURM_ERROR;
xhash_walk(ometrics_set->name_hash, _dump_metrics_from_list, &args);
return SLURM_SUCCESS;
}
static void _metrics_create_kv(metric_set_t *set, data_parser_type_t type,
void *data, ssize_t sz_data, char *pfx,
char *name, char *desc,
openmetrics_type_t ometric_type, char *key,
char *val)
{
metric_t *metric;
metric_keyval_t **kv = NULL;
char *pfx_name = NULL;
if ((key && val) && (*key && *val)) {
kv = xcalloc(2, sizeof(*kv));
kv[0] = xmalloc(sizeof(**kv));
kv[0]->key = xstrdup(key);
kv[0]->val = xstrdup(val);
/* sentinel */
kv[1] = xmalloc(sizeof(**kv));
kv[1]->key = NULL;
kv[1]->val = NULL;
}
if (pfx) {
xstrfmtcat(pfx_name, "slurm_%s_%s", pfx, name);
name = pfx_name;
}
metric = metrics_create_metric(set, type, data, sz_data, name, desc,
ometric_type, kv);
if (_metrics_add(set, metric)) {
if (key)
error("Cannot add metric %s{%s=%s}", name, key, val);
else
error("Cannot add metric %s", name);
metrics_free_metric(metric);
} else {
if (key)
log_flag(METRICS, "Added metric %s{%s=%s}",
name, key, val);
else
log_flag(METRICS, "Added metric %s", name);
}
xfree(pfx_name);
}
extern metric_set_t *metrics_p_parse_nodes_metrics(nodes_stats_t *stats)
{
uint16_t total_node_cnt = 0;
metric_set_t *set = _metrics_new_set();
for (int i = 0; i < stats->node_stats_count; i++) {
if (!stats->node_stats_table[i])
continue;
node_stats_t *n = stats->node_stats_table[i];
// clang-format off
ADD_METRIC_KEYVAL(set, UINT16, n->cpus_total, node_cpus, "Total number of cpus in the node", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT16, n->cpus_alloc, node_cpus_alloc, "Allocated cpus in the node", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT16, n->cpus_efctv, node_cpus_effective, "CPUs allocatable to jobs not reserved for system usage", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT16, n->cpus_idle, node_cpus_idle, "Idle cpus in the node", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT64, n->mem_alloc, node_memory_alloc_bytes, "Bytes allocated to jobs in the node", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT64, n->mem_avail, node_memory_effective_bytes, "Memory allocatable to jobs not reserved for system usage", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT64, n->mem_free, node_memory_free_bytes, "Free memory in bytes of the node", GAUGE, "node", n->name);
ADD_METRIC_KEYVAL(set, UINT64, n->mem_total, node_memory_bytes, "Total memory in bytes of the node", GAUGE, "node", n->name);
// clang-format on
total_node_cnt++;
}
// clang-format off
ADD_METRIC(set, UINT16, total_node_cnt, nodes, "Total number of nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->alloc, nodes_alloc, "Number of nodes in Allocated state", GAUGE);
ADD_METRIC(set, UINT16, stats->blocked, nodes_blocked, "Number of nodes in Blocked state", GAUGE);
ADD_METRIC(set, UINT16, stats->cg, nodes_completing, "Number of nodes with Completing flag", GAUGE);
ADD_METRIC(set, UINT16, stats->cloud, nodes_cloud, "Number of Cloud nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->down, nodes_down, "Number of nodes in Down state", GAUGE);
ADD_METRIC(set, UINT16, stats->drain, nodes_drain, "Number of nodes with Drain flag", GAUGE);
ADD_METRIC(set, UINT16, stats->drained, nodes_drained, "Number of drained nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->draining, nodes_draining, "Number of nodes in draining condition (Drain state with active jobs)", GAUGE);
ADD_METRIC(set, UINT16, stats->dyn_future, nodes_dyn_future, "Number of future dynamic nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->dyn_normal, nodes_dyn_normal, "Number of dynamic nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->external, nodes_external, "Number of external nodes", GAUGE);
ADD_METRIC(set, UINT16, stats->fail, nodes_fail, "Number of nodes with Fail flag", GAUGE);
ADD_METRIC(set, UINT16, stats->future, nodes_future, "Number of nodes in Future state", GAUGE);
ADD_METRIC(set, UINT16, stats->idle, nodes_idle, "Number of nodes in Idle state", GAUGE);
ADD_METRIC(set, UINT16, stats->invalid_reg, nodes_invalid_reg, "Number of nodes with Invalid Registration flag", GAUGE);
ADD_METRIC(set, UINT16, stats->maint, nodes_maint, "Number of nodes with Maintenance flag", GAUGE);
ADD_METRIC(set, UINT16, stats->mixed, nodes_mixed, "Number of nodes in Mixed state", GAUGE);
ADD_METRIC(set, UINT16, stats->no_resp, nodes_noresp, "Number of nodes with Not Responding flag", GAUGE);
ADD_METRIC(set, UINT16, stats->planned, nodes_planned, "Number of nodes with Planned flag", GAUGE);
ADD_METRIC(set, UINT16, stats->power_down, nodes_power_down, "Number of nodes marked to be powered down", GAUGE);
ADD_METRIC(set, UINT16, stats->power_up, nodes_power_up, "Number of nodes marked to be powered up", GAUGE);
ADD_METRIC(set, UINT16, stats->powered_down, nodes_powered_down, "Number of nodes powered down", GAUGE);
ADD_METRIC(set, UINT16, stats->powering_down, nodes_powering_up, "Number of nodes powering up", GAUGE);
ADD_METRIC(set, UINT16, stats->reboot_issued, nodes_reboot_issued, "Number of nodes with Reboot Issued flag", GAUGE);
ADD_METRIC(set, UINT16, stats->reboot_requested, nodes_reboot_req, "Number of nodes with Reboot Requested flag", GAUGE);
ADD_METRIC(set, UINT16, stats->resv, nodes_resv, "Number of nodes with Reserved flag", GAUGE);
ADD_METRIC(set, UINT16, stats->unknown, nodes_unknown, "Number of nodes in Unknown state", GAUGE);
// clang-format on
return set;
}
extern metric_set_t *metrics_p_parse_jobs_metrics(jobs_stats_t *stats)
{
metric_set_t *set = _metrics_new_set();
// clang-format off
ADD_METRIC(set, UINT32, stats->bootfail, jobs_bootfail, "Number of jobs in BootFail state", GAUGE);
ADD_METRIC(set, UINT32, stats->cancelled, jobs_cancelled, "Number of jobs in Cancelled state", GAUGE);
ADD_METRIC(set, UINT32, stats->completed, jobs_completed, "Number of jobs in Completed state", GAUGE);
ADD_METRIC(set, UINT32, stats->completing, jobs_completing, "Number of jobs in Completing state", GAUGE);
ADD_METRIC(set, UINT32, stats->configuring, jobs_configuring, "Number of jobs in Configuring state", GAUGE);
ADD_METRIC(set, UINT16, stats->cpus_alloc, jobs_cpus_alloc, "Total number of Cpus allocated by jobs", GAUGE);
ADD_METRIC(set, UINT32, stats->deadline, jobs_deadline, "Number of jobs in Deadline state", GAUGE);
ADD_METRIC(set, UINT32, stats->expediting, jobs_expediting, "Number of jobs in Expediting state", GAUGE);
ADD_METRIC(set, UINT32, stats->failed, jobs_failed, "Number of jobs in Failed state", GAUGE);
ADD_METRIC(set, UINT32, stats->fed_requeued, jobs_fed_requeued, "Number of jobs requeued in a federation", GAUGE);
ADD_METRIC(set, UINT32, stats->finished, jobs_finished, "Number of finished jobs", GAUGE);
ADD_METRIC(set, UINT32, stats->hold, jobs_hold, "Number of jobs in Hold state", GAUGE);
ADD_METRIC(set, UINT32, stats->job_cnt, jobs, "Total number of jobs", GAUGE);
ADD_METRIC(set, UINT64, stats->memory_alloc, jobs_memory_alloc, "Total memory bytes allocated by jobs", GAUGE);
ADD_METRIC(set, UINT32, stats->node_failed, jobs_node_failed, "Number of jobs in Node Failed state", GAUGE);
ADD_METRIC(set, UINT16, stats->nodes_alloc, jobs_nodes_alloc, "Total number of nodes allocated by jobs", GAUGE);
ADD_METRIC(set, UINT32, stats->oom, jobs_outofmemory, "Number of jobs in Out of Memory state", GAUGE);
ADD_METRIC(set, UINT32, stats->pending, jobs_pending, "Number of jobs in Pending state", GAUGE);
ADD_METRIC(set, UINT32, stats->powerup_node, jobs_powerup_node, "Number of jobs in PowerUp Node state", GAUGE);
ADD_METRIC(set, UINT32, stats->preempted, jobs_preempted, "Number of jobs in Preempted state", GAUGE);
ADD_METRIC(set, UINT32, stats->requeued, jobs_requeued, "Number of jobs in Requeued state", GAUGE);
ADD_METRIC(set, UINT32, stats->resizing, jobs_resizing, "Number of jobs in Resizing state", GAUGE);
ADD_METRIC(set, UINT32, stats->revoked, jobs_revoked, "Number of jobs in Rvoked state", GAUGE);
ADD_METRIC(set, UINT32, stats->running, jobs_running, "Number of jobs in Running state", GAUGE);
ADD_METRIC(set, UINT32, stats->signaling, jobs_signaling, "Number of jobs being signaled", GAUGE);
ADD_METRIC(set, UINT32, stats->stageout, jobs_stageout, "Number of jobs in StageOut state", GAUGE);
ADD_METRIC(set, UINT32, stats->started, jobs_started, "Number of started jobs", GAUGE);
ADD_METRIC(set, UINT32, stats->suspended, jobs_suspended, "Number of jobs in Suspended state", GAUGE);
ADD_METRIC(set, UINT32, stats->timeout, jobs_timeout, "Number of jobs in Timeout state", GAUGE);
// clang-format on
return set;
}
static int _part_stats_to_metric(void *x, void *arg)
{
partition_stats_t *ps = x;
metric_set_t *set = (metric_set_t *) arg;
// clang-format off
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs, partition_jobs, "Number of jobs in this partition", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_bootfail, partition_jobs_bootfail, "Number of jobs in BootFail state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_cancelled, partition_jobs_cancelled, "Number of jobs in Cancelled state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_completed, partition_jobs_completed, "Number of jobs in Completed state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_completing, partition_jobs_completing, "Number of jobs in Completing state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_configuring, partition_jobs_configuring, "Number of jobs in Configuring state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->jobs_cpus_alloc, partition_jobs_cpus_alloc, "Total number of Cpus allocated by jobs", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_deadline, partition_jobs_deadline, "Number of jobs in Deadline state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_expediting, partition_jobs_expediting, "Number of jobs in Expediting state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_failed, partition_jobs_failed, "Number of jobs in Failed state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_fed_requeued, partition_jobs_fed_requeued, "Number of jobs requeued in a federation", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_finished, partition_jobs_finished, "Number of jobs in Finished", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_hold, partition_jobs_hold, "Number of jobs in Hold state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->jobs_max_job_nodes, partition_jobs_max_job_nodes, "Max of the max_nodes required of all pending jobs in that partition", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->jobs_max_job_nodes_nohold, partition_jobs_max_job_nodes_nohold, "Max of the max_nodes required of all pending jobs in that partition excluding Held jobs", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT64, ps->jobs_memory_alloc, partition_jobs_memory_alloc, "Total memory bytes allocated by jobs", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->jobs_min_job_nodes, partition_jobs_min_job_nodes, "Max of the min_nodes required of all pending jobs in that partition", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->jobs_min_job_nodes_nohold, partition_jobs_min_job_nodes_nohold, "Max of the min_nodes required of all pending jobs in that partition excluding Held jobs", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_node_failed, partition_jobs_node_failed, "Number of jobs in Node Failed state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_oom, partition_jobs_outofmemory, "Number of jobs in Out of Memory state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_pending, partition_jobs_pending, "Number of jobs in Pending state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_powerup_node, partition_jobs_powerup_node, "Number of jobs in PowerUp Node state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_preempted, partition_jobs_preempted, "Number of jobs in Preempted state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_requeued, partition_jobs_requeued, "Number of jobs in Requeued state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_resizing, partition_jobs_resizing, "Number of jobs in Resizing state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_revoked, partition_jobs_revoked, "Number of revoked jobs", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_running, partition_jobs_running, "Number of jobs in Running state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_signaling, partition_jobs_signaling, "Number of jobs in Signaling state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_stageout, partition_jobs_stageout, "Number of jobs in StageOut state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_started, partition_jobs_started, "Number of jobs started", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_suspended, partition_jobs_suspended, "Number of jobs in Suspended state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_timeout, partition_jobs_timeout, "Number of jobs in Timeout state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->jobs_wait_part_node_limit, partition_jobs_wait_part_node_limit, "Jobs wait partition node limit", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_alloc, partition_nodes_alloc, "Nodes allocated", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_blocked, partition_nodes_blocked, "Nodes blocked", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_cg, partition_nodes_cg, "Nodes in completing state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_cloud, partition_nodes_cloud, "Cloud nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_cpus_efctv, partition_nodes_cpus_efctv, "Number of effective CPUs on all nodes, excludes CoreSpec", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_cpus_idle, partition_nodes_cpus_idle, "Number of idle CPUs on all nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_cpus_alloc,partition_nodes_cpus_alloc, "Number of allocated cpus", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_down, partition_nodes_down, "Nodes in Down state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_drain, partition_nodes_drain, "Nodes in Drain state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_drained, partition_nodes_drained, "Nodes in Drained state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_draining, partition_nodes_draining, "Number of nodes in draining condition (Drain state with active jobs)", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_dyn_future, partition_nodes_dyn_future, "Dynamic nodes in Future state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_dyn_normal, partition_nodes_dyn_normal, "Dynamic nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_external, partition_nodes_external, "External nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_fail, partition_nodes_fail, "Nodes in Fail state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_future, partition_nodes_future, "Nodes in Future state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_idle, partition_nodes_idle, "Nodes in Idle state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_invalid_reg, partition_nodes_invalid_reg, "Number of nodes with Invalid Registration flag", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_maint, partition_nodes_maint, "Nodes in maintenance state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT64, ps->nodes_mem_alloc, partition_nodes_mem_alloc, "Amount of allocated memory of all nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT64, ps->nodes_mem_avail, partition_nodes_mem_avail, "Amount of available memory of all nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT64, ps->nodes_mem_free, partition_nodes_mem_free, "Amount of free memory in all nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT64, ps->nodes_mem_total, partition_nodes_mem_tot, "Total amount of memory of all nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_mixed, partition_nodes_mixed, "Nodes in Mixed state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_no_resp, partition_nodes_no_resp, "Nodes in Not Responding state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_planned, partition_nodes_planned, "Nodes in Planned state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_power_down, partition_nodes_power_down, "Nodes marked to Power Down", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_power_up, partition_nodes_power_up, "Nodes marked to Power Up", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_powered_down, partition_nodes_powered_down, "Powered down nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_powering_down, partition_nodes_powering_down, "Powering down nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_powering_up, partition_nodes_powering_up, "Powering up nodes", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_reboot_issued, partition_nodes_reboot_issued, "Nodes which initiated reboot", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_reboot_requested, partition_nodes_reboot_requested, "Nodes with Reboot Requested flag", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_resv, partition_nodes_resv, "Nodes with Reserved flag", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->nodes_unknown, partition_nodes_unknown, "Nodes in Unknown state", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT32, ps->total_cpus, partition_cpus, "Partition total cpus", GAUGE, "partition", ps->name);
ADD_METRIC_KEYVAL(set, UINT16, ps->total_nodes, partition_nodes, "Partition total nodes", GAUGE, "partition", ps->name);
// clang-format on
return SLURM_SUCCESS;
}
extern metric_set_t *metrics_p_parse_parts_metrics(partitions_stats_t *stats)
{
metric_set_t *set = _metrics_new_set();
uint32_t part_cnt = list_count(stats->parts);
ADD_METRIC(set, UINT32, part_cnt, partitions, "Total number of partitions", GAUGE);
list_for_each_ro(stats->parts, _part_stats_to_metric, set);
return set;
}
static int _ua_stats_to_metric(void *x, void *arg)
{
ua_stats_t *ua = x;
jobs_stats_t *js = ua->s;
metric_set_t *set = ((foreach_stats_parse_metric_t *) arg)->set;
char *key = ((foreach_stats_parse_metric_t *) arg)->str;
char *pfx = ((foreach_stats_parse_metric_t *) arg)->pfx;
// clang-format off
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->bootfail, pfx, jobs_bootfail, "Number of jobs in BootFail state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->cancelled, pfx, jobs_cancelled, "Number of jobs in Cancelled state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->completed, pfx, jobs_completed, "Number of jobs in Completed state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->completing, pfx, jobs_completing, "Number of jobs in Completing state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->configuring, pfx, jobs_configuring, "Number of jobs in Configuring state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT16, js->cpus_alloc, pfx, jobs_cpus_alloc, "Total number of Cpus allocated by jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->deadline, pfx, jobs_deadline, "Number of jobs in Deadline state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->expediting, pfx, jobs_expediting, "Number of jobs in Expediting state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->failed, pfx, jobs_failed, "Number of jobs in Failed state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->fed_requeued, pfx, jobs_fed_requeued, "Number of jobs requeued in a federation", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->finished, pfx, jobs_finished, "Number of finished jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->hold, pfx, jobs_hold, "Number of jobs in Hold state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->job_cnt, pfx, jobs, "Total number of jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->memory_alloc, pfx, jobs_memory_alloc, "Total memory bytes allocated by jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->node_failed, pfx, jobs_node_failed, "Number of jobs in Node Failed state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT16, js->nodes_alloc, pfx, jobs_nodes_alloc, "Total number of nodes allocated by jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->oom, pfx, jobs_outofmemory, "Number of jobs in Out of Memory state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->pending, pfx, jobs_pending, "Number of jobs in Pending state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->powerup_node, pfx, jobs_powerup_node, "Number of jobs in PowerUp Node state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->preempted, pfx, jobs_preempted, "Number of jobs in Preempted state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->requeued, pfx, jobs_requeued, "Number of jobs in Requeued state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->resizing, pfx, jobs_resizing, "Number of jobs in Resizing state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->revoked, pfx, jobs_revoked, "Number of jobs revoked", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->running, pfx, jobs_running, "Number of jobs in Running state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->signaling, pfx, jobs_signaling, "Number of jobs being signaled", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->stageout, pfx, jobs_stageout, "Number of jobs in StageOut state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->started, pfx, jobs_started, "Number of started jobs", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->suspended, pfx, jobs_suspended, "Number of jobs in Suspended state", GAUGE, key, ua->name);
ADD_METRIC_KEYVAL_PFX(set, UINT32, js->timeout, pfx, jobs_timeout, "Number of jobs in Timeout state", GAUGE, key, ua->name);
// clang-format on
return SLURM_SUCCESS;
}
extern metric_set_t *metrics_p_parse_ua_metrics(users_accts_stats_t *stats)
{
metric_set_t *set = _metrics_new_set();
foreach_stats_parse_metric_t args;
args.set = set;
args.pfx = "user";
args.str = "username";
list_for_each_ro(stats->users, _ua_stats_to_metric, &args);
args.pfx = "account";
args.str = "account";
list_for_each_ro(stats->accounts, _ua_stats_to_metric, &args);
return set;
}
extern metric_set_t *metrics_p_parse_sched_metrics(scheduling_stats_t *s)
{
metric_set_t *set = _metrics_new_set();
// clang-format off
ADD_METRIC(set, UINT32, s->agent_count, agent_cnt, "Number of agent threads", GAUGE);
ADD_METRIC(set, UINT32, s->agent_queue_size, agent_queue_size, "Outgoing RPC retry queue length", GAUGE);
ADD_METRIC(set, UINT32, s->agent_thread_count, agent_thread_cnt, "Total active agent-created threads", GAUGE);
ADD_METRIC(set, UINT32, s->bf_depth_mean, bf_depth_mean, "Mean backfill cycle depth", GAUGE);
ADD_METRIC(set, UINT32, s->bf_mean_cycle, bf_mean_cycle, "Mean backfill cycle time", GAUGE);
ADD_METRIC(set, UINT32, s->bf_mean_table_sz, bf_mean_table_sz, "Mean backfill table size", GAUGE);
ADD_METRIC(set, UINT32, s->bf_queue_len_mean, bf_queue_len_mean, "Mean backfill queue length", GAUGE);
ADD_METRIC(set, UINT32, s->bf_try_depth_mean, bf_try_depth_mean, "Mean depth attempts in backfill", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->backfilled_het_jobs, backfilled_het_jobs, "Heterogeneous components backfilled", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->backfilled_jobs, backfilled_jobs, "Total backfilled jobs since reset", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_active, bf_active, "Backfill scheduler active jobs", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_cycle_counter, bf_cycle_cnt, "Backfill cycle count", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_cycle_last, bf_cycle_last, "Last backfill cycle time", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_cycle_max, bf_cycle_max, "Max backfill cycle time", GAUGE);
ADD_METRIC(set, UINT64, s->diag_stats->bf_cycle_sum, bf_cycle_tot, "Sum of backfill cycle times", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_depth_sum, bf_depth_tot, "Sum of backfill job depths", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_depth_try_sum, bf_depth_try_tot, "Sum of backfill depth attempts", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_last_depth, bf_last_depth, "Last backfill depth", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_last_depth_try, bf_last_depth_try, "Last backfill depth attempts", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_queue_len, bf_queue_len, "Backfill queue length", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_queue_len_sum, bf_queue_len_tot, "Sum of backfill queue lengths", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_table_size, bf_table_size, "Backfill table size", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_table_size_sum, bf_table_size_tot, "Sum of backfill table sizes", GAUGE);
ADD_METRIC(set, TIMESTAMP, s->diag_stats->bf_when_last_cycle, bf_when_last_cycle, "Timestamp of last backfill cycle", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_canceled, sdiag_jobs_canceled, "Jobs canceled since reset", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_completed, sdiag_jobs_completed, "Jobs completed since reset", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_failed, sdiag_jobs_failed, "Jobs failed since reset", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_pending, sdiag_jobs_pending, "Jobs pending at timestamp", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_running, sdiag_jobs_running, "Jobs running at timestamp", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_started, sdiag_jobs_started, "Jobs started since reset", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->jobs_submitted, sdiag_jobs_submitted, "Jobs submitted since reset", GAUGE);
ADD_METRIC(set, TIMESTAMP, s->diag_stats->job_states_ts, sdiag_job_states_ts, "Job states timestamp", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->last_backfilled_jobs, last_backfilled_jobs, "Backfilled jobs since last cycle", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->latency, sdiag_latency, "Measurement latency", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_cycle_counter, schedule_cycle_cnt, "Scheduling cycle count", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_cycle_depth, schedule_cycle_depth, "Processed jobs depth total", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_cycle_last, schedule_cycle_last, "Last scheduling cycle time", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_cycle_max, schedule_cycle_max, "Max scheduling cycle time", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_cycle_sum, schedule_cycle_tot, "Sum of scheduling cycle times", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_queue_len, schedule_queue_len, "Jobs pending queue length", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_END], sched_exit_end , "End of job queue", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_MAX_DEPTH], sched_exit_max_depth, "Hit default_queue_depth", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_MAX_JOB_START], sched_exit_max_job_start, "Hit sched_max_job_start", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_LIC], sched_exit_lic, "Blocked on licenses", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_RPC_CNT], sched_exit_rpc_cnt, "Hit max_rpc_cnt", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->schedule_exit[SCHEDULE_EXIT_TIMEOUT], sched_exit_timeout, "Timeout (max_sched_time)", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_END], bf_exit_end, "End of job queue", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_MAX_JOB_START], bf_exit_max_job_start, "Hit bf_max_job_start", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_MAX_JOB_TEST], bf_exit_max_job_test, "Hit bf_max_job_test", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_STATE_CHANGED], bf_exit_state_changed, "System state changed", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_TABLE_LIMIT], bf_exit_table_limit, "Hit table size limit (bf_node_space_size)", GAUGE);
ADD_METRIC(set, UINT32, s->diag_stats->bf_exit[BF_EXIT_TIMEOUT], bf_exit_timeout, "Timeout (bf_max_time)", GAUGE);
ADD_METRIC(set, UINT32, s->sched_mean_cycle, sched_mean_cycle, "Mean scheduling cycle time", GAUGE);
ADD_METRIC(set, UINT32, s->sched_mean_depth_cycle, sched_mean_depth_cycle, "Mean depth of scheduling cycles", GAUGE);
ADD_METRIC(set, UINT32, s->server_thread_count, server_thread_cnt, "Active slurmctld threads count", GAUGE);
ADD_METRIC(set, UINT32, s->slurmdbd_queue_size, slurmdbd_queue_size, "Queued messages to SlurmDBD", GAUGE);
ADD_METRIC(set, UINT64, s->last_proc_req_start, last_proc_req_start, "Timestamp of last process request start", GAUGE);
ADD_METRIC(set, TIMESTAMP, s->time, sched_stats_timestamp, "Statistics snapshot timestamp", GAUGE);
// clang-format on
return set;
}