| /* |
| * Gather top-level ZFS pool and resilver/scan statistics and print using |
| * influxdb line protocol |
| * usage: [options] [pool_name] |
| * where options are: |
| * --execd, -e run in telegraf execd input plugin mode, [CR] on |
| * stdin causes a sample to be printed and wait for |
| * the next [CR] |
| * --no-histograms, -n don't print histogram data (reduces cardinality |
| * if you don't care about histograms) |
| * --sum-histogram-buckets, -s sum histogram bucket values |
| * |
| * To integrate into telegraf use one of: |
| * 1. the `inputs.execd` plugin with the `--execd` option |
| * 2. the `inputs.exec` plugin to simply run with no options |
| * |
| * NOTE: libzfs is an unstable interface. YMMV. |
| * |
| * The design goals of this software include: |
| * + be as lightweight as possible |
| * + reduce the number of external dependencies as far as possible, hence |
| * there is no dependency on a client library for managing the metric |
| * collection -- info is printed, KISS |
| * + broken pools or kernel bugs can cause this process to hang in an |
| * unkillable state. For this reason, it is best to keep the damage limited |
| * to a small process like zpool_influxdb rather than a larger collector. |
| * |
| * Copyright 2018-2020 Richard Elling |
| * |
| * This software is dual-licensed MIT and CDDL. |
| * |
| * The MIT License (MIT) |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License Version 1.0 (CDDL-1.0). |
| * You can obtain a copy of the license from the top-level file |
| * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. |
| * You may not use this file except in compliance with the license. |
| * |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * CDDL HEADER END |
| */ |
| #include <string.h> |
| #include <getopt.h> |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <inttypes.h> |
| #include <libzfs_impl.h> |
| |
| #define POOL_MEASUREMENT "zpool_stats" |
| #define SCAN_MEASUREMENT "zpool_scan_stats" |
| #define VDEV_MEASUREMENT "zpool_vdev_stats" |
| #define POOL_LATENCY_MEASUREMENT "zpool_latency" |
| #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" |
| #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ |
| #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" |
| #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ |
| |
| /* global options */ |
| int execd_mode = 0; |
| int no_histograms = 0; |
| int sum_histogram_buckets = 0; |
| char metric_data_type = 'u'; |
| uint64_t metric_value_mask = UINT64_MAX; |
| uint64_t timestamp = 0; |
| int complained_about_sync = 0; |
| char *tags = ""; |
| |
| typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); |
| |
| /* |
| * influxdb line protocol rules for escaping are important because the |
| * zpool name can include characters that need to be escaped |
| * |
| * caller is responsible for freeing result |
| */ |
| static char * |
| escape_string(char *s) |
| { |
| char *c, *d; |
| char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); |
| if (t == NULL) { |
| fprintf(stderr, "error: cannot allocate memory\n"); |
| exit(1); |
| } |
| |
| for (c = s, d = t; *c != '\0'; c++, d++) { |
| switch (*c) { |
| case ' ': |
| case ',': |
| case '=': |
| case '\\': |
| *d++ = '\\'; |
| fallthrough; |
| default: |
| *d = *c; |
| } |
| } |
| *d = '\0'; |
| return (t); |
| } |
| |
| /* |
| * print key=value where value is a uint64_t |
| */ |
| static void |
| print_kv(char *key, uint64_t value) |
| { |
| printf("%s=%llu%c", key, |
| (u_longlong_t)value & metric_value_mask, metric_data_type); |
| } |
| |
| /* |
| * print_scan_status() prints the details as often seen in the "zpool status" |
| * output. However, unlike the zpool command, which is intended for humans, |
| * this output is suitable for long-term tracking in influxdb. |
| * TODO: update to include issued scan data |
| */ |
| static int |
| print_scan_status(nvlist_t *nvroot, const char *pool_name) |
| { |
| uint_t c; |
| int64_t elapsed; |
| uint64_t examined, pass_exam, paused_time, paused_ts, rate; |
| uint64_t remaining_time; |
| pool_scan_stat_t *ps = NULL; |
| double pct_done; |
| char *state[DSS_NUM_STATES] = { |
| "none", "scanning", "finished", "canceled"}; |
| char *func; |
| |
| (void) nvlist_lookup_uint64_array(nvroot, |
| ZPOOL_CONFIG_SCAN_STATS, |
| (uint64_t **)&ps, &c); |
| |
| /* |
| * ignore if there are no stats |
| */ |
| if (ps == NULL) |
| return (0); |
| |
| /* |
| * return error if state is bogus |
| */ |
| if (ps->pss_state >= DSS_NUM_STATES || |
| ps->pss_func >= POOL_SCAN_FUNCS) { |
| if (complained_about_sync % 1000 == 0) { |
| fprintf(stderr, "error: cannot decode scan stats: " |
| "ZFS is out of sync with compiled zpool_influxdb"); |
| complained_about_sync++; |
| } |
| return (1); |
| } |
| |
| switch (ps->pss_func) { |
| case POOL_SCAN_NONE: |
| func = "none_requested"; |
| break; |
| case POOL_SCAN_SCRUB: |
| func = "scrub"; |
| break; |
| case POOL_SCAN_RESILVER: |
| func = "resilver"; |
| break; |
| #ifdef POOL_SCAN_REBUILD |
| case POOL_SCAN_REBUILD: |
| func = "rebuild"; |
| break; |
| #endif |
| default: |
| func = "scan"; |
| } |
| |
| /* overall progress */ |
| examined = ps->pss_examined ? ps->pss_examined : 1; |
| pct_done = 0.0; |
| if (ps->pss_to_examine > 0) |
| pct_done = 100.0 * examined / ps->pss_to_examine; |
| |
| #ifdef EZFS_SCRUB_PAUSED |
| paused_ts = ps->pss_pass_scrub_pause; |
| paused_time = ps->pss_pass_scrub_spent_paused; |
| #else |
| paused_ts = 0; |
| paused_time = 0; |
| #endif |
| |
| /* calculations for this pass */ |
| if (ps->pss_state == DSS_SCANNING) { |
| elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - |
| (int64_t)paused_time; |
| elapsed = (elapsed > 0) ? elapsed : 1; |
| pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; |
| rate = pass_exam / elapsed; |
| rate = (rate > 0) ? rate : 1; |
| remaining_time = ps->pss_to_examine - examined / rate; |
| } else { |
| elapsed = |
| (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - |
| (int64_t)paused_time; |
| elapsed = (elapsed > 0) ? elapsed : 1; |
| pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; |
| rate = pass_exam / elapsed; |
| remaining_time = 0; |
| } |
| rate = rate ? rate : 1; |
| |
| /* influxdb line protocol format: "tags metrics timestamp" */ |
| printf("%s%s,function=%s,name=%s,state=%s ", |
| SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); |
| print_kv("end_ts", ps->pss_end_time); |
| print_kv(",errors", ps->pss_errors); |
| print_kv(",examined", examined); |
| print_kv(",issued", ps->pss_issued); |
| print_kv(",pass_examined", pass_exam); |
| print_kv(",pass_issued", ps->pss_pass_issued); |
| print_kv(",paused_ts", paused_ts); |
| print_kv(",paused_t", paused_time); |
| printf(",pct_done=%.2f", pct_done); |
| print_kv(",processed", ps->pss_processed); |
| print_kv(",rate", rate); |
| print_kv(",remaining_t", remaining_time); |
| print_kv(",start_ts", ps->pss_start_time); |
| print_kv(",to_examine", ps->pss_to_examine); |
| print_kv(",to_process", ps->pss_to_process); |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| return (0); |
| } |
| |
| /* |
| * get a vdev name that corresponds to the top-level vdev names |
| * printed by `zpool status` |
| */ |
| static char * |
| get_vdev_name(nvlist_t *nvroot, const char *parent_name) |
| { |
| static char vdev_name[256]; |
| char *vdev_type = NULL; |
| uint64_t vdev_id = 0; |
| |
| if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, |
| &vdev_type) != 0) { |
| vdev_type = "unknown"; |
| } |
| if (nvlist_lookup_uint64( |
| nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { |
| vdev_id = UINT64_MAX; |
| } |
| if (parent_name == NULL) { |
| (void) snprintf(vdev_name, sizeof (vdev_name), "%s", |
| vdev_type); |
| } else { |
| (void) snprintf(vdev_name, sizeof (vdev_name), |
| "%s/%s-%llu", |
| parent_name, vdev_type, (u_longlong_t)vdev_id); |
| } |
| return (vdev_name); |
| } |
| |
| /* |
| * get a string suitable for an influxdb tag that describes this vdev |
| * |
| * By default only the vdev hierarchical name is shown, separated by '/' |
| * If the vdev has an associated path, which is typical of leaf vdevs, |
| * then the path is added. |
| * It would be nice to have the devid instead of the path, but under |
| * Linux we cannot be sure a devid will exist and we'd rather have |
| * something than nothing, so we'll use path instead. |
| */ |
| static char * |
| get_vdev_desc(nvlist_t *nvroot, const char *parent_name) |
| { |
| static char vdev_desc[2 * MAXPATHLEN]; |
| char *vdev_type = NULL; |
| uint64_t vdev_id = 0; |
| char vdev_value[MAXPATHLEN]; |
| char *vdev_path = NULL; |
| char *s, *t; |
| |
| if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { |
| vdev_type = "unknown"; |
| } |
| if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { |
| vdev_id = UINT64_MAX; |
| } |
| if (nvlist_lookup_string( |
| nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { |
| vdev_path = NULL; |
| } |
| |
| if (parent_name == NULL) { |
| s = escape_string(vdev_type); |
| (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); |
| free(s); |
| } else { |
| s = escape_string((char *)parent_name); |
| t = escape_string(vdev_type); |
| (void) snprintf(vdev_value, sizeof (vdev_value), |
| "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); |
| free(s); |
| free(t); |
| } |
| if (vdev_path == NULL) { |
| (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", |
| vdev_value); |
| } else { |
| s = escape_string(vdev_path); |
| (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", |
| s, vdev_value); |
| free(s); |
| } |
| return (vdev_desc); |
| } |
| |
| /* |
| * vdev summary stats are a combination of the data shown by |
| * `zpool status` and `zpool list -v` |
| */ |
| static int |
| print_summary_stats(nvlist_t *nvroot, const char *pool_name, |
| const char *parent_name) |
| { |
| uint_t c; |
| vdev_stat_t *vs; |
| char *vdev_desc = NULL; |
| vdev_desc = get_vdev_desc(nvroot, parent_name); |
| if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, |
| (uint64_t **)&vs, &c) != 0) { |
| return (1); |
| } |
| printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, |
| pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, |
| (vdev_aux_t)vs->vs_aux), vdev_desc); |
| print_kv("alloc", vs->vs_alloc); |
| print_kv(",free", vs->vs_space - vs->vs_alloc); |
| print_kv(",size", vs->vs_space); |
| print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); |
| print_kv(",read_errors", vs->vs_read_errors); |
| print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); |
| print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); |
| print_kv(",write_errors", vs->vs_write_errors); |
| print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); |
| print_kv(",checksum_errors", vs->vs_checksum_errors); |
| print_kv(",fragmentation", vs->vs_fragmentation); |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| return (0); |
| } |
| |
| /* |
| * vdev latency stats are histograms stored as nvlist arrays of uint64. |
| * Latency stats include the ZIO scheduler classes plus lower-level |
| * vdev latencies. |
| * |
| * In many cases, the top-level "root" view obscures the underlying |
| * top-level vdev operations. For example, if a pool has a log, special, |
| * or cache device, then each can behave very differently. It is useful |
| * to see how each is responding. |
| */ |
| static int |
| print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, |
| const char *parent_name) |
| { |
| uint_t c, end = 0; |
| nvlist_t *nv_ex; |
| char *vdev_desc = NULL; |
| |
| /* short_names become part of the metric name and are influxdb-ready */ |
| struct lat_lookup { |
| char *name; |
| char *short_name; |
| uint64_t sum; |
| uint64_t *array; |
| }; |
| struct lat_lookup lat_type[] = { |
| {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, |
| {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, |
| {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, |
| {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, |
| {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, |
| {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, |
| {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, |
| #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO |
| {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, |
| #endif |
| {NULL, NULL} |
| }; |
| |
| if (nvlist_lookup_nvlist(nvroot, |
| ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { |
| return (6); |
| } |
| |
| vdev_desc = get_vdev_desc(nvroot, parent_name); |
| |
| for (int i = 0; lat_type[i].name; i++) { |
| if (nvlist_lookup_uint64_array(nv_ex, |
| lat_type[i].name, &lat_type[i].array, &c) != 0) { |
| fprintf(stderr, "error: can't get %s\n", |
| lat_type[i].name); |
| return (3); |
| } |
| /* end count count, all of the arrays are the same size */ |
| end = c - 1; |
| } |
| |
| for (int bucket = 0; bucket <= end; bucket++) { |
| if (bucket < MIN_LAT_INDEX) { |
| /* don't print, but collect the sum */ |
| for (int i = 0; lat_type[i].name; i++) { |
| lat_type[i].sum += lat_type[i].array[bucket]; |
| } |
| continue; |
| } |
| if (bucket < end) { |
| printf("%s%s,le=%0.6f,name=%s,%s ", |
| POOL_LATENCY_MEASUREMENT, tags, |
| (float)(1ULL << bucket) * 1e-9, |
| pool_name, vdev_desc); |
| } else { |
| printf("%s%s,le=+Inf,name=%s,%s ", |
| POOL_LATENCY_MEASUREMENT, tags, pool_name, |
| vdev_desc); |
| } |
| for (int i = 0; lat_type[i].name; i++) { |
| if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { |
| lat_type[i].sum += lat_type[i].array[bucket]; |
| } else { |
| lat_type[i].sum = lat_type[i].array[bucket]; |
| } |
| print_kv(lat_type[i].short_name, lat_type[i].sum); |
| if (lat_type[i + 1].name != NULL) { |
| printf(","); |
| } |
| } |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| } |
| return (0); |
| } |
| |
| /* |
| * vdev request size stats are histograms stored as nvlist arrays of uint64. |
| * Request size stats include the ZIO scheduler classes plus lower-level |
| * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. |
| * |
| * In many cases, the top-level "root" view obscures the underlying |
| * top-level vdev operations. For example, if a pool has a log, special, |
| * or cache device, then each can behave very differently. It is useful |
| * to see how each is responding. |
| */ |
| static int |
| print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, |
| const char *parent_name) |
| { |
| uint_t c, end = 0; |
| nvlist_t *nv_ex; |
| char *vdev_desc = NULL; |
| |
| /* short_names become the field name */ |
| struct size_lookup { |
| char *name; |
| char *short_name; |
| uint64_t sum; |
| uint64_t *array; |
| }; |
| struct size_lookup size_type[] = { |
| {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, |
| {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, |
| {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, |
| #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO |
| {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, |
| {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, |
| #endif |
| {NULL, NULL} |
| }; |
| |
| if (nvlist_lookup_nvlist(nvroot, |
| ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { |
| return (6); |
| } |
| |
| vdev_desc = get_vdev_desc(nvroot, parent_name); |
| |
| for (int i = 0; size_type[i].name; i++) { |
| if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, |
| &size_type[i].array, &c) != 0) { |
| fprintf(stderr, "error: can't get %s\n", |
| size_type[i].name); |
| return (3); |
| } |
| /* end count count, all of the arrays are the same size */ |
| end = c - 1; |
| } |
| |
| for (int bucket = 0; bucket <= end; bucket++) { |
| if (bucket < MIN_SIZE_INDEX) { |
| /* don't print, but collect the sum */ |
| for (int i = 0; size_type[i].name; i++) { |
| size_type[i].sum += size_type[i].array[bucket]; |
| } |
| continue; |
| } |
| |
| if (bucket < end) { |
| printf("%s%s,le=%llu,name=%s,%s ", |
| POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, |
| pool_name, vdev_desc); |
| } else { |
| printf("%s%s,le=+Inf,name=%s,%s ", |
| POOL_IO_SIZE_MEASUREMENT, tags, pool_name, |
| vdev_desc); |
| } |
| for (int i = 0; size_type[i].name; i++) { |
| if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { |
| size_type[i].sum += size_type[i].array[bucket]; |
| } else { |
| size_type[i].sum = size_type[i].array[bucket]; |
| } |
| print_kv(size_type[i].short_name, size_type[i].sum); |
| if (size_type[i + 1].name != NULL) { |
| printf(","); |
| } |
| } |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| } |
| return (0); |
| } |
| |
| /* |
| * ZIO scheduler queue stats are stored as gauges. This is unfortunate |
| * because the values can change very rapidly and any point-in-time |
| * value will quickly be obsoleted. It is also not easy to downsample. |
| * Thus only the top-level queue stats might be beneficial... maybe. |
| */ |
| static int |
| print_queue_stats(nvlist_t *nvroot, const char *pool_name, |
| const char *parent_name) |
| { |
| nvlist_t *nv_ex; |
| uint64_t value; |
| |
| /* short_names are used for the field name */ |
| struct queue_lookup { |
| char *name; |
| char *short_name; |
| }; |
| struct queue_lookup queue_type[] = { |
| {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, |
| {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, |
| {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, |
| {NULL, NULL} |
| }; |
| |
| if (nvlist_lookup_nvlist(nvroot, |
| ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { |
| return (6); |
| } |
| |
| printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, |
| get_vdev_desc(nvroot, parent_name)); |
| for (int i = 0; queue_type[i].name; i++) { |
| if (nvlist_lookup_uint64(nv_ex, |
| queue_type[i].name, &value) != 0) { |
| fprintf(stderr, "error: can't get %s\n", |
| queue_type[i].name); |
| return (3); |
| } |
| print_kv(queue_type[i].short_name, value); |
| if (queue_type[i + 1].name != NULL) { |
| printf(","); |
| } |
| } |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| return (0); |
| } |
| |
| /* |
| * top-level vdev stats are at the pool level |
| */ |
| static int |
| print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) |
| { |
| nvlist_t *nv_ex; |
| uint64_t value; |
| |
| /* short_names become part of the metric name */ |
| struct queue_lookup { |
| char *name; |
| char *short_name; |
| }; |
| struct queue_lookup queue_type[] = { |
| {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, |
| {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, |
| {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, |
| {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, |
| {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, |
| {NULL, NULL} |
| }; |
| |
| if (nvlist_lookup_nvlist(nvroot, |
| ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { |
| return (6); |
| } |
| |
| printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, |
| pool_name); |
| for (int i = 0; queue_type[i].name; i++) { |
| if (nvlist_lookup_uint64(nv_ex, |
| queue_type[i].name, &value) != 0) { |
| fprintf(stderr, "error: can't get %s\n", |
| queue_type[i].name); |
| return (3); |
| } |
| if (i > 0) |
| printf(","); |
| print_kv(queue_type[i].short_name, value); |
| } |
| |
| printf(" %llu\n", (u_longlong_t)timestamp); |
| return (0); |
| } |
| |
| /* |
| * recursive stats printer |
| */ |
| static int |
| print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, |
| const char *pool_name, const char *parent_name, int descend) |
| { |
| uint_t c, children; |
| nvlist_t **child; |
| char vdev_name[256]; |
| int err; |
| |
| err = func(nvroot, pool_name, parent_name); |
| if (err) |
| return (err); |
| |
| if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, |
| &child, &children) == 0) { |
| (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name), |
| sizeof (vdev_name)); |
| |
| for (c = 0; c < children; c++) { |
| print_recursive_stats(func, child[c], pool_name, |
| vdev_name, descend); |
| } |
| } |
| return (0); |
| } |
| |
| /* |
| * call-back to print the stats from the pool config |
| * |
| * Note: if the pool is broken, this can hang indefinitely and perhaps in an |
| * unkillable state. |
| */ |
| static int |
| print_stats(zpool_handle_t *zhp, void *data) |
| { |
| uint_t c; |
| int err; |
| boolean_t missing; |
| nvlist_t *config, *nvroot; |
| vdev_stat_t *vs; |
| struct timespec tv; |
| char *pool_name; |
| |
| /* if not this pool return quickly */ |
| if (data && |
| strncmp(data, zhp->zpool_name, ZFS_MAX_DATASET_NAME_LEN) != 0) { |
| zpool_close(zhp); |
| return (0); |
| } |
| |
| if (zpool_refresh_stats(zhp, &missing) != 0) { |
| zpool_close(zhp); |
| return (1); |
| } |
| |
| config = zpool_get_config(zhp, NULL); |
| if (clock_gettime(CLOCK_REALTIME, &tv) != 0) |
| timestamp = (uint64_t)time(NULL) * 1000000000; |
| else |
| timestamp = |
| ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; |
| |
| if (nvlist_lookup_nvlist( |
| config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { |
| zpool_close(zhp); |
| return (2); |
| } |
| if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, |
| (uint64_t **)&vs, &c) != 0) { |
| zpool_close(zhp); |
| return (3); |
| } |
| |
| pool_name = escape_string(zhp->zpool_name); |
| err = print_recursive_stats(print_summary_stats, nvroot, |
| pool_name, NULL, 1); |
| /* if any of these return an error, skip the rest */ |
| if (err == 0) |
| err = print_top_level_vdev_stats(nvroot, pool_name); |
| |
| if (no_histograms == 0) { |
| if (err == 0) |
| err = print_recursive_stats(print_vdev_latency_stats, nvroot, |
| pool_name, NULL, 1); |
| if (err == 0) |
| err = print_recursive_stats(print_vdev_size_stats, nvroot, |
| pool_name, NULL, 1); |
| if (err == 0) |
| err = print_recursive_stats(print_queue_stats, nvroot, |
| pool_name, NULL, 0); |
| } |
| if (err == 0) |
| err = print_scan_status(nvroot, pool_name); |
| |
| free(pool_name); |
| zpool_close(zhp); |
| return (err); |
| } |
| |
| static void |
| usage(char *name) |
| { |
| fprintf(stderr, "usage: %s [--execd][--no-histograms]" |
| "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); |
| exit(EXIT_FAILURE); |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| int opt; |
| int ret = 8; |
| char *line = NULL; |
| size_t len, tagslen = 0; |
| struct option long_options[] = { |
| {"execd", no_argument, NULL, 'e'}, |
| {"help", no_argument, NULL, 'h'}, |
| {"no-histograms", no_argument, NULL, 'n'}, |
| {"signed-int", no_argument, NULL, 'i'}, |
| {"sum-histogram-buckets", no_argument, NULL, 's'}, |
| {"tags", required_argument, NULL, 't'}, |
| {0, 0, 0, 0} |
| }; |
| while ((opt = getopt_long( |
| argc, argv, "ehinst:", long_options, NULL)) != -1) { |
| switch (opt) { |
| case 'e': |
| execd_mode = 1; |
| break; |
| case 'i': |
| metric_data_type = 'i'; |
| metric_value_mask = INT64_MAX; |
| break; |
| case 'n': |
| no_histograms = 1; |
| break; |
| case 's': |
| sum_histogram_buckets = 1; |
| break; |
| case 't': |
| tagslen = strlen(optarg) + 2; |
| tags = calloc(tagslen, 1); |
| if (tags == NULL) { |
| fprintf(stderr, |
| "error: cannot allocate memory " |
| "for tags\n"); |
| exit(1); |
| } |
| (void) snprintf(tags, tagslen, ",%s", optarg); |
| break; |
| default: |
| usage(argv[0]); |
| } |
| } |
| |
| libzfs_handle_t *g_zfs; |
| if ((g_zfs = libzfs_init()) == NULL) { |
| fprintf(stderr, |
| "error: cannot initialize libzfs. " |
| "Is the zfs module loaded or zrepl running?\n"); |
| exit(EXIT_FAILURE); |
| } |
| if (execd_mode == 0) { |
| ret = zpool_iter(g_zfs, print_stats, argv[optind]); |
| return (ret); |
| } |
| while (getline(&line, &len, stdin) != -1) { |
| ret = zpool_iter(g_zfs, print_stats, argv[optind]); |
| fflush(stdout); |
| } |
| return (ret); |
| } |