| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
| * Copyright (c) 2013, Delphix. All rights reserved. |
| * Copyright (c) 2013, Saso Kiselkov. All rights reserved. |
| * Copyright (c) 2013, Nexenta Systems, Inc. All rights reserved. |
| * Copyright (c) 2020, George Amanakis. All rights reserved. |
| */ |
| |
| #ifndef _SYS_ARC_IMPL_H |
| #define _SYS_ARC_IMPL_H |
| |
| #include <sys/arc.h> |
| #include <sys/multilist.h> |
| #include <sys/zio_crypt.h> |
| #include <sys/zthr.h> |
| #include <sys/aggsum.h> |
| #include <sys/wmsum.h> |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| /* |
| * Note that buffers can be in one of 6 states: |
| * ARC_anon - anonymous (discussed below) |
| * ARC_mru - recently used, currently cached |
| * ARC_mru_ghost - recently used, no longer in cache |
| * ARC_mfu - frequently used, currently cached |
| * ARC_mfu_ghost - frequently used, no longer in cache |
| * ARC_l2c_only - exists in L2ARC but not other states |
| * When there are no active references to the buffer, they are |
| * are linked onto a list in one of these arc states. These are |
| * the only buffers that can be evicted or deleted. Within each |
| * state there are multiple lists, one for meta-data and one for |
| * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, |
| * etc.) is tracked separately so that it can be managed more |
| * explicitly: favored over data, limited explicitly. |
| * |
| * Anonymous buffers are buffers that are not associated with |
| * a DVA. These are buffers that hold dirty block copies |
| * before they are written to stable storage. By definition, |
| * they are "ref'd" and are considered part of arc_mru |
| * that cannot be freed. Generally, they will acquire a DVA |
| * as they are written and migrate onto the arc_mru list. |
| * |
| * The ARC_l2c_only state is for buffers that are in the second |
| * level ARC but no longer in any of the ARC_m* lists. The second |
| * level ARC itself may also contain buffers that are in any of |
| * the ARC_m* states - meaning that a buffer can exist in two |
| * places. The reason for the ARC_l2c_only state is to keep the |
| * buffer header in the hash table, so that reads that hit the |
| * second level ARC benefit from these fast lookups. |
| */ |
| |
| typedef struct arc_state { |
| /* |
| * list of evictable buffers |
| */ |
| multilist_t arcs_list[ARC_BUFC_NUMTYPES]; |
| /* |
| * supports the "dbufs" kstat |
| */ |
| arc_state_type_t arcs_state; |
| /* |
| * total amount of evictable data in this state |
| */ |
| zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned; |
| /* |
| * total amount of data in this state; this includes: evictable, |
| * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. |
| */ |
| zfs_refcount_t arcs_size; |
| } arc_state_t; |
| |
| typedef struct arc_callback arc_callback_t; |
| |
| struct arc_callback { |
| void *acb_private; |
| arc_read_done_func_t *acb_done; |
| arc_buf_t *acb_buf; |
| boolean_t acb_encrypted; |
| boolean_t acb_compressed; |
| boolean_t acb_noauth; |
| boolean_t acb_nobuf; |
| zbookmark_phys_t acb_zb; |
| zio_t *acb_zio_dummy; |
| zio_t *acb_zio_head; |
| arc_callback_t *acb_next; |
| }; |
| |
| typedef struct arc_write_callback arc_write_callback_t; |
| |
| struct arc_write_callback { |
| void *awcb_private; |
| arc_write_done_func_t *awcb_ready; |
| arc_write_done_func_t *awcb_children_ready; |
| arc_write_done_func_t *awcb_physdone; |
| arc_write_done_func_t *awcb_done; |
| arc_buf_t *awcb_buf; |
| }; |
| |
| /* |
| * ARC buffers are separated into multiple structs as a memory saving measure: |
| * - Common fields struct, always defined, and embedded within it: |
| * - L2-only fields, always allocated but undefined when not in L2ARC |
| * - L1-only fields, only allocated when in L1ARC |
| * |
| * Buffer in L1 Buffer only in L2 |
| * +------------------------+ +------------------------+ |
| * | arc_buf_hdr_t | | arc_buf_hdr_t | |
| * | | | | |
| * | | | | |
| * | | | | |
| * +------------------------+ +------------------------+ |
| * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | |
| * | (undefined if L1-only) | | | |
| * +------------------------+ +------------------------+ |
| * | l1arc_buf_hdr_t | |
| * | | |
| * | | |
| * | | |
| * | | |
| * +------------------------+ |
| * |
| * Because it's possible for the L2ARC to become extremely large, we can wind |
| * up eating a lot of memory in L2ARC buffer headers, so the size of a header |
| * is minimized by only allocating the fields necessary for an L1-cached buffer |
| * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and |
| * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple |
| * words in pointers. arc_hdr_realloc() is used to switch a header between |
| * these two allocation states. |
| */ |
| typedef struct l1arc_buf_hdr { |
| kmutex_t b_freeze_lock; |
| zio_cksum_t *b_freeze_cksum; |
| |
| /* for waiting on reads to complete */ |
| kcondvar_t b_cv; |
| uint8_t b_byteswap; |
| |
| /* protected by arc state mutex */ |
| arc_state_t *b_state; |
| multilist_node_t b_arc_node; |
| |
| /* protected by hash lock */ |
| clock_t b_arc_access; |
| uint32_t b_mru_hits; |
| uint32_t b_mru_ghost_hits; |
| uint32_t b_mfu_hits; |
| uint32_t b_mfu_ghost_hits; |
| uint32_t b_bufcnt; |
| arc_buf_t *b_buf; |
| |
| /* self protecting */ |
| zfs_refcount_t b_refcnt; |
| |
| arc_callback_t *b_acb; |
| abd_t *b_pabd; |
| } l1arc_buf_hdr_t; |
| |
| typedef enum l2arc_dev_hdr_flags_t { |
| L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */ |
| } l2arc_dev_hdr_flags_t; |
| |
| /* |
| * Pointer used in persistent L2ARC (for pointing to log blocks). |
| */ |
| typedef struct l2arc_log_blkptr { |
| /* |
| * Offset of log block within the device, in bytes |
| */ |
| uint64_t lbp_daddr; |
| /* |
| * Aligned payload size (in bytes) of the log block |
| */ |
| uint64_t lbp_payload_asize; |
| /* |
| * Offset in bytes of the first buffer in the payload |
| */ |
| uint64_t lbp_payload_start; |
| /* |
| * lbp_prop has the following format: |
| * * logical size (in bytes) |
| * * aligned (after compression) size (in bytes) |
| * * compression algorithm (we always LZ4-compress l2arc logs) |
| * * checksum algorithm (used for lbp_cksum) |
| */ |
| uint64_t lbp_prop; |
| zio_cksum_t lbp_cksum; /* checksum of log */ |
| } l2arc_log_blkptr_t; |
| |
| /* |
| * The persistent L2ARC device header. |
| * Byte order of magic determines whether 64-bit bswap of fields is necessary. |
| */ |
| typedef struct l2arc_dev_hdr_phys { |
| uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */ |
| uint64_t dh_version; /* Persistent L2ARC version */ |
| |
| /* |
| * Global L2ARC device state and metadata. |
| */ |
| uint64_t dh_spa_guid; |
| uint64_t dh_vdev_guid; |
| uint64_t dh_log_entries; /* mirror of l2ad_log_entries */ |
| uint64_t dh_evict; /* evicted offset in bytes */ |
| uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */ |
| /* |
| * Used in zdb.c for determining if a log block is valid, in the same |
| * way that l2arc_rebuild() does. |
| */ |
| uint64_t dh_start; /* mirror of l2ad_start */ |
| uint64_t dh_end; /* mirror of l2ad_end */ |
| /* |
| * Start of log block chain. [0] -> newest log, [1] -> one older (used |
| * for initiating prefetch). |
| */ |
| l2arc_log_blkptr_t dh_start_lbps[2]; |
| /* |
| * Aligned size of all log blocks as accounted by vdev_space_update(). |
| */ |
| uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */ |
| uint64_t dh_lb_count; /* mirror of l2ad_lb_count */ |
| /* |
| * Mirrors of vdev_trim_action_time and vdev_trim_state, used to |
| * display when the cache device was fully trimmed for the last |
| * time. |
| */ |
| uint64_t dh_trim_action_time; |
| uint64_t dh_trim_state; |
| const uint64_t dh_pad[30]; /* pad to 512 bytes */ |
| zio_eck_t dh_tail; |
| } l2arc_dev_hdr_phys_t; |
| CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE); |
| |
| /* |
| * A single ARC buffer header entry in a l2arc_log_blk_phys_t. |
| */ |
| typedef struct l2arc_log_ent_phys { |
| dva_t le_dva; /* dva of buffer */ |
| uint64_t le_birth; /* birth txg of buffer */ |
| /* |
| * le_prop has the following format: |
| * * logical size (in bytes) |
| * * physical (compressed) size (in bytes) |
| * * compression algorithm |
| * * object type (used to restore arc_buf_contents_t) |
| * * protected status (used for encryption) |
| * * prefetch status (used in l2arc_read_done()) |
| */ |
| uint64_t le_prop; |
| uint64_t le_daddr; /* buf location on l2dev */ |
| uint64_t le_complevel; |
| /* |
| * We pad the size of each entry to a power of 2 so that the size of |
| * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT, |
| * because of the L2ARC_SET_*SIZE macros. |
| */ |
| const uint64_t le_pad[2]; /* pad to 64 bytes */ |
| } l2arc_log_ent_phys_t; |
| |
| #define L2ARC_LOG_BLK_MAX_ENTRIES (1022) |
| |
| /* |
| * A log block of up to 1022 ARC buffer log entries, chained into the |
| * persistent L2ARC metadata linked list. Byte order of magic determines |
| * whether 64-bit bswap of fields is necessary. |
| */ |
| typedef struct l2arc_log_blk_phys { |
| uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */ |
| /* |
| * There are 2 chains (headed by dh_start_lbps[2]), and this field |
| * points back to the previous block in this chain. We alternate |
| * which chain we append to, so they are time-wise and offset-wise |
| * interleaved, but that is an optimization rather than for |
| * correctness. |
| */ |
| l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */ |
| /* |
| * Pad header section to 128 bytes |
| */ |
| uint64_t lb_pad[7]; |
| /* Payload */ |
| l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES]; |
| } l2arc_log_blk_phys_t; /* 64K total */ |
| |
| /* |
| * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with |
| * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros. |
| */ |
| CTASSERT_GLOBAL(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t), |
| 1ULL << SPA_MINBLOCKSHIFT)); |
| CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE); |
| CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE); |
| |
| /* |
| * These structures hold in-flight abd buffers for log blocks as they're being |
| * written to the L2ARC device. |
| */ |
| typedef struct l2arc_lb_abd_buf { |
| abd_t *abd; |
| list_node_t node; |
| } l2arc_lb_abd_buf_t; |
| |
| /* |
| * These structures hold pointers to log blocks present on the L2ARC device. |
| */ |
| typedef struct l2arc_lb_ptr_buf { |
| l2arc_log_blkptr_t *lb_ptr; |
| list_node_t node; |
| } l2arc_lb_ptr_buf_t; |
| |
| /* Macros for setting fields in le_prop and lbp_prop */ |
| #define L2BLK_GET_LSIZE(field) \ |
| BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) |
| #define L2BLK_SET_LSIZE(field, x) \ |
| BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) |
| #define L2BLK_GET_PSIZE(field) \ |
| BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1) |
| #define L2BLK_SET_PSIZE(field, x) \ |
| BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) |
| #define L2BLK_GET_COMPRESS(field) \ |
| BF64_GET((field), 32, SPA_COMPRESSBITS) |
| #define L2BLK_SET_COMPRESS(field, x) \ |
| BF64_SET((field), 32, SPA_COMPRESSBITS, x) |
| #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1) |
| #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x) |
| #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8) |
| #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x) |
| #define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8) |
| #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x) |
| #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1) |
| #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x) |
| #define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4) |
| #define L2BLK_SET_STATE(field, x) BF64_SET((field), 57, 4, x) |
| |
| #define PTR_SWAP(x, y) \ |
| do { \ |
| void *tmp = (x);\ |
| x = y; \ |
| y = tmp; \ |
| _NOTE(CONSTCOND)\ |
| } while (0) |
| |
| #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */ |
| #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */ |
| |
| /* |
| * L2ARC Internals |
| */ |
| typedef struct l2arc_dev { |
| vdev_t *l2ad_vdev; /* vdev */ |
| spa_t *l2ad_spa; /* spa */ |
| uint64_t l2ad_hand; /* next write location */ |
| uint64_t l2ad_start; /* first addr on device */ |
| uint64_t l2ad_end; /* last addr on device */ |
| boolean_t l2ad_first; /* first sweep through */ |
| boolean_t l2ad_writing; /* currently writing */ |
| kmutex_t l2ad_mtx; /* lock for buffer list */ |
| list_t l2ad_buflist; /* buffer list */ |
| list_node_t l2ad_node; /* device list node */ |
| zfs_refcount_t l2ad_alloc; /* allocated bytes */ |
| /* |
| * Persistence-related stuff |
| */ |
| l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */ |
| uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */ |
| l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */ |
| int l2ad_log_ent_idx; /* index into cur log blk */ |
| /* Number of bytes in current log block's payload */ |
| uint64_t l2ad_log_blk_payload_asize; |
| /* |
| * Offset (in bytes) of the first buffer in current log block's |
| * payload. |
| */ |
| uint64_t l2ad_log_blk_payload_start; |
| /* Flag indicating whether a rebuild is scheduled or is going on */ |
| boolean_t l2ad_rebuild; |
| boolean_t l2ad_rebuild_cancel; |
| boolean_t l2ad_rebuild_began; |
| uint64_t l2ad_log_entries; /* entries per log blk */ |
| uint64_t l2ad_evict; /* evicted offset in bytes */ |
| /* List of pointers to log blocks present in the L2ARC device */ |
| list_t l2ad_lbptr_list; |
| /* |
| * Aligned size of all log blocks as accounted by vdev_space_update(). |
| */ |
| zfs_refcount_t l2ad_lb_asize; |
| /* |
| * Number of log blocks present on the device. |
| */ |
| zfs_refcount_t l2ad_lb_count; |
| boolean_t l2ad_trim_all; /* TRIM whole device */ |
| } l2arc_dev_t; |
| |
| /* |
| * Encrypted blocks will need to be stored encrypted on the L2ARC |
| * disk as they appear in the main pool. In order for this to work we |
| * need to pass around the encryption parameters so they can be used |
| * to write data to the L2ARC. This struct is only defined in the |
| * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED |
| * flag set. |
| */ |
| typedef struct arc_buf_hdr_crypt { |
| abd_t *b_rabd; /* raw encrypted data */ |
| dmu_object_type_t b_ot; /* object type */ |
| uint32_t b_ebufcnt; /* count of encrypted buffers */ |
| |
| /* dsobj for looking up encryption key for l2arc encryption */ |
| uint64_t b_dsobj; |
| |
| /* encryption parameters */ |
| uint8_t b_salt[ZIO_DATA_SALT_LEN]; |
| uint8_t b_iv[ZIO_DATA_IV_LEN]; |
| |
| /* |
| * Technically this could be removed since we will always be able to |
| * get the mac from the bp when we need it. However, it is inconvenient |
| * for callers of arc code to have to pass a bp in all the time. This |
| * also allows us to assert that L2ARC data is properly encrypted to |
| * match the data in the main storage pool. |
| */ |
| uint8_t b_mac[ZIO_DATA_MAC_LEN]; |
| } arc_buf_hdr_crypt_t; |
| |
| typedef struct l2arc_buf_hdr { |
| /* protected by arc_buf_hdr mutex */ |
| l2arc_dev_t *b_dev; /* L2ARC device */ |
| uint64_t b_daddr; /* disk address, offset byte */ |
| uint32_t b_hits; |
| arc_state_type_t b_arcs_state; |
| list_node_t b_l2node; |
| } l2arc_buf_hdr_t; |
| |
| typedef struct l2arc_write_callback { |
| l2arc_dev_t *l2wcb_dev; /* device info */ |
| arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ |
| /* in-flight list of log blocks */ |
| list_t l2wcb_abd_list; |
| } l2arc_write_callback_t; |
| |
| struct arc_buf_hdr { |
| /* protected by hash lock */ |
| dva_t b_dva; |
| uint64_t b_birth; |
| |
| arc_buf_contents_t b_type; |
| uint8_t b_complevel; |
| uint8_t b_reserved1; /* used for 4 byte alignment */ |
| uint16_t b_reserved2; /* used for 4 byte alignment */ |
| arc_buf_hdr_t *b_hash_next; |
| arc_flags_t b_flags; |
| |
| /* |
| * This field stores the size of the data buffer after |
| * compression, and is set in the arc's zio completion handlers. |
| * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes). |
| * |
| * While the block pointers can store up to 32MB in their psize |
| * field, we can only store up to 32MB minus 512B. This is due |
| * to the bp using a bias of 1, whereas we use a bias of 0 (i.e. |
| * a field of zeros represents 512B in the bp). We can't use a |
| * bias of 1 since we need to reserve a psize of zero, here, to |
| * represent holes and embedded blocks. |
| * |
| * This isn't a problem in practice, since the maximum size of a |
| * buffer is limited to 16MB, so we never need to store 32MB in |
| * this field. Even in the upstream illumos code base, the |
| * maximum size of a buffer is limited to 16MB. |
| */ |
| uint16_t b_psize; |
| |
| /* |
| * This field stores the size of the data buffer before |
| * compression, and cannot change once set. It is in units |
| * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes) |
| */ |
| uint16_t b_lsize; /* immutable */ |
| uint64_t b_spa; /* immutable */ |
| |
| /* L2ARC fields. Undefined when not in L2ARC. */ |
| l2arc_buf_hdr_t b_l2hdr; |
| /* L1ARC fields. Undefined when in l2arc_only state */ |
| l1arc_buf_hdr_t b_l1hdr; |
| /* |
| * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED |
| * is set and the L1 header exists. |
| */ |
| arc_buf_hdr_crypt_t b_crypt_hdr; |
| }; |
| |
| typedef struct arc_stats { |
| kstat_named_t arcstat_hits; |
| kstat_named_t arcstat_misses; |
| kstat_named_t arcstat_demand_data_hits; |
| kstat_named_t arcstat_demand_data_misses; |
| kstat_named_t arcstat_demand_metadata_hits; |
| kstat_named_t arcstat_demand_metadata_misses; |
| kstat_named_t arcstat_prefetch_data_hits; |
| kstat_named_t arcstat_prefetch_data_misses; |
| kstat_named_t arcstat_prefetch_metadata_hits; |
| kstat_named_t arcstat_prefetch_metadata_misses; |
| kstat_named_t arcstat_mru_hits; |
| kstat_named_t arcstat_mru_ghost_hits; |
| kstat_named_t arcstat_mfu_hits; |
| kstat_named_t arcstat_mfu_ghost_hits; |
| kstat_named_t arcstat_deleted; |
| /* |
| * Number of buffers that could not be evicted because the hash lock |
| * was held by another thread. The lock may not necessarily be held |
| * by something using the same buffer, since hash locks are shared |
| * by multiple buffers. |
| */ |
| kstat_named_t arcstat_mutex_miss; |
| /* |
| * Number of buffers skipped when updating the access state due to the |
| * header having already been released after acquiring the hash lock. |
| */ |
| kstat_named_t arcstat_access_skip; |
| /* |
| * Number of buffers skipped because they have I/O in progress, are |
| * indirect prefetch buffers that have not lived long enough, or are |
| * not from the spa we're trying to evict from. |
| */ |
| kstat_named_t arcstat_evict_skip; |
| /* |
| * Number of times arc_evict_state() was unable to evict enough |
| * buffers to reach its target amount. |
| */ |
| kstat_named_t arcstat_evict_not_enough; |
| kstat_named_t arcstat_evict_l2_cached; |
| kstat_named_t arcstat_evict_l2_eligible; |
| kstat_named_t arcstat_evict_l2_eligible_mfu; |
| kstat_named_t arcstat_evict_l2_eligible_mru; |
| kstat_named_t arcstat_evict_l2_ineligible; |
| kstat_named_t arcstat_evict_l2_skip; |
| kstat_named_t arcstat_hash_elements; |
| kstat_named_t arcstat_hash_elements_max; |
| kstat_named_t arcstat_hash_collisions; |
| kstat_named_t arcstat_hash_chains; |
| kstat_named_t arcstat_hash_chain_max; |
| kstat_named_t arcstat_p; |
| kstat_named_t arcstat_c; |
| kstat_named_t arcstat_c_min; |
| kstat_named_t arcstat_c_max; |
| kstat_named_t arcstat_size; |
| /* |
| * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. |
| * Note that the compressed bytes may match the uncompressed bytes |
| * if the block is either not compressed or compressed arc is disabled. |
| */ |
| kstat_named_t arcstat_compressed_size; |
| /* |
| * Uncompressed size of the data stored in b_pabd. If compressed |
| * arc is disabled then this value will be identical to the stat |
| * above. |
| */ |
| kstat_named_t arcstat_uncompressed_size; |
| /* |
| * Number of bytes stored in all the arc_buf_t's. This is classified |
| * as "overhead" since this data is typically short-lived and will |
| * be evicted from the arc when it becomes unreferenced unless the |
| * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level |
| * values have been set (see comment in dbuf.c for more information). |
| */ |
| kstat_named_t arcstat_overhead_size; |
| /* |
| * Number of bytes consumed by internal ARC structures necessary |
| * for tracking purposes; these structures are not actually |
| * backed by ARC buffers. This includes arc_buf_hdr_t structures |
| * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only |
| * caches), and arc_buf_t structures (allocated via arc_buf_t |
| * cache). |
| */ |
| kstat_named_t arcstat_hdr_size; |
| /* |
| * Number of bytes consumed by ARC buffers of type equal to |
| * ARC_BUFC_DATA. This is generally consumed by buffers backing |
| * on disk user data (e.g. plain file contents). |
| */ |
| kstat_named_t arcstat_data_size; |
| /* |
| * Number of bytes consumed by ARC buffers of type equal to |
| * ARC_BUFC_METADATA. This is generally consumed by buffers |
| * backing on disk data that is used for internal ZFS |
| * structures (e.g. ZAP, dnode, indirect blocks, etc). |
| */ |
| kstat_named_t arcstat_metadata_size; |
| /* |
| * Number of bytes consumed by dmu_buf_impl_t objects. |
| */ |
| kstat_named_t arcstat_dbuf_size; |
| /* |
| * Number of bytes consumed by dnode_t objects. |
| */ |
| kstat_named_t arcstat_dnode_size; |
| /* |
| * Number of bytes consumed by bonus buffers. |
| */ |
| kstat_named_t arcstat_bonus_size; |
| #if defined(COMPAT_FREEBSD11) |
| /* |
| * Sum of the previous three counters, provided for compatibility. |
| */ |
| kstat_named_t arcstat_other_size; |
| #endif |
| |
| /* |
| * Total number of bytes consumed by ARC buffers residing in the |
| * arc_anon state. This includes *all* buffers in the arc_anon |
| * state; e.g. data, metadata, evictable, and unevictable buffers |
| * are all included in this value. |
| */ |
| kstat_named_t arcstat_anon_size; |
| /* |
| * Number of bytes consumed by ARC buffers that meet the |
| * following criteria: backing buffers of type ARC_BUFC_DATA, |
| * residing in the arc_anon state, and are eligible for eviction |
| * (e.g. have no outstanding holds on the buffer). |
| */ |
| kstat_named_t arcstat_anon_evictable_data; |
| /* |
| * Number of bytes consumed by ARC buffers that meet the |
| * following criteria: backing buffers of type ARC_BUFC_METADATA, |
| * residing in the arc_anon state, and are eligible for eviction |
| * (e.g. have no outstanding holds on the buffer). |
| */ |
| kstat_named_t arcstat_anon_evictable_metadata; |
| /* |
| * Total number of bytes consumed by ARC buffers residing in the |
| * arc_mru state. This includes *all* buffers in the arc_mru |
| * state; e.g. data, metadata, evictable, and unevictable buffers |
| * are all included in this value. |
| */ |
| kstat_named_t arcstat_mru_size; |
| /* |
| * Number of bytes consumed by ARC buffers that meet the |
| * following criteria: backing buffers of type ARC_BUFC_DATA, |
| * residing in the arc_mru state, and are eligible for eviction |
| * (e.g. have no outstanding holds on the buffer). |
| */ |
| kstat_named_t arcstat_mru_evictable_data; |
| /* |
| * Number of bytes consumed by ARC buffers that meet the |
| * following criteria: backing buffers of type ARC_BUFC_METADATA, |
| * residing in the arc_mru state, and are eligible for eviction |
| * (e.g. have no outstanding holds on the buffer). |
| */ |
| kstat_named_t arcstat_mru_evictable_metadata; |
| /* |
| * Total number of bytes that *would have been* consumed by ARC |
| * buffers in the arc_mru_ghost state. The key thing to note |
| * here, is the fact that this size doesn't actually indicate |
| * RAM consumption. The ghost lists only consist of headers and |
| * don't actually have ARC buffers linked off of these headers. |
| * Thus, *if* the headers had associated ARC buffers, these |
| * buffers *would have* consumed this number of bytes. |
| */ |
| kstat_named_t arcstat_mru_ghost_size; |
| /* |
| * Number of bytes that *would have been* consumed by ARC |
| * buffers that are eligible for eviction, of type |
| * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. |
| */ |
| kstat_named_t arcstat_mru_ghost_evictable_data; |
| /* |
| * Number of bytes that *would have been* consumed by ARC |
| * buffers that are eligible for eviction, of type |
| * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. |
| */ |
| kstat_named_t arcstat_mru_ghost_evictable_metadata; |
| /* |
| * Total number of bytes consumed by ARC buffers residing in the |
| * arc_mfu state. This includes *all* buffers in the arc_mfu |
| * state; e.g. data, metadata, evictable, and unevictable buffers |
| * are all included in this value. |
| */ |
| kstat_named_t arcstat_mfu_size; |
| /* |
| * Number of bytes consumed by ARC buffers that are eligible for |
| * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu |
| * state. |
| */ |
| kstat_named_t arcstat_mfu_evictable_data; |
| /* |
| * Number of bytes consumed by ARC buffers that are eligible for |
| * eviction, of type ARC_BUFC_METADATA, and reside in the |
| * arc_mfu state. |
| */ |
| kstat_named_t arcstat_mfu_evictable_metadata; |
| /* |
| * Total number of bytes that *would have been* consumed by ARC |
| * buffers in the arc_mfu_ghost state. See the comment above |
| * arcstat_mru_ghost_size for more details. |
| */ |
| kstat_named_t arcstat_mfu_ghost_size; |
| /* |
| * Number of bytes that *would have been* consumed by ARC |
| * buffers that are eligible for eviction, of type |
| * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. |
| */ |
| kstat_named_t arcstat_mfu_ghost_evictable_data; |
| /* |
| * Number of bytes that *would have been* consumed by ARC |
| * buffers that are eligible for eviction, of type |
| * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. |
| */ |
| kstat_named_t arcstat_mfu_ghost_evictable_metadata; |
| kstat_named_t arcstat_l2_hits; |
| kstat_named_t arcstat_l2_misses; |
| /* |
| * Allocated size (in bytes) of L2ARC cached buffers by ARC state. |
| */ |
| kstat_named_t arcstat_l2_prefetch_asize; |
| kstat_named_t arcstat_l2_mru_asize; |
| kstat_named_t arcstat_l2_mfu_asize; |
| /* |
| * Allocated size (in bytes) of L2ARC cached buffers by buffer content |
| * type. |
| */ |
| kstat_named_t arcstat_l2_bufc_data_asize; |
| kstat_named_t arcstat_l2_bufc_metadata_asize; |
| kstat_named_t arcstat_l2_feeds; |
| kstat_named_t arcstat_l2_rw_clash; |
| kstat_named_t arcstat_l2_read_bytes; |
| kstat_named_t arcstat_l2_write_bytes; |
| kstat_named_t arcstat_l2_writes_sent; |
| kstat_named_t arcstat_l2_writes_done; |
| kstat_named_t arcstat_l2_writes_error; |
| kstat_named_t arcstat_l2_writes_lock_retry; |
| kstat_named_t arcstat_l2_evict_lock_retry; |
| kstat_named_t arcstat_l2_evict_reading; |
| kstat_named_t arcstat_l2_evict_l1cached; |
| kstat_named_t arcstat_l2_free_on_write; |
| kstat_named_t arcstat_l2_abort_lowmem; |
| kstat_named_t arcstat_l2_cksum_bad; |
| kstat_named_t arcstat_l2_io_error; |
| kstat_named_t arcstat_l2_lsize; |
| kstat_named_t arcstat_l2_psize; |
| kstat_named_t arcstat_l2_hdr_size; |
| /* |
| * Number of L2ARC log blocks written. These are used for restoring the |
| * L2ARC. Updated during writing of L2ARC log blocks. |
| */ |
| kstat_named_t arcstat_l2_log_blk_writes; |
| /* |
| * Moving average of the aligned size of the L2ARC log blocks, in |
| * bytes. Updated during L2ARC rebuild and during writing of L2ARC |
| * log blocks. |
| */ |
| kstat_named_t arcstat_l2_log_blk_avg_asize; |
| /* Aligned size of L2ARC log blocks on L2ARC devices. */ |
| kstat_named_t arcstat_l2_log_blk_asize; |
| /* Number of L2ARC log blocks present on L2ARC devices. */ |
| kstat_named_t arcstat_l2_log_blk_count; |
| /* |
| * Moving average of the aligned size of L2ARC restored data, in bytes, |
| * to the aligned size of their metadata in L2ARC, in bytes. |
| * Updated during L2ARC rebuild and during writing of L2ARC log blocks. |
| */ |
| kstat_named_t arcstat_l2_data_to_meta_ratio; |
| /* |
| * Number of times the L2ARC rebuild was successful for an L2ARC device. |
| */ |
| kstat_named_t arcstat_l2_rebuild_success; |
| /* |
| * Number of times the L2ARC rebuild failed because the device header |
| * was in an unsupported format or corrupted. |
| */ |
| kstat_named_t arcstat_l2_rebuild_abort_unsupported; |
| /* |
| * Number of times the L2ARC rebuild failed because of IO errors |
| * while reading a log block. |
| */ |
| kstat_named_t arcstat_l2_rebuild_abort_io_errors; |
| /* |
| * Number of times the L2ARC rebuild failed because of IO errors when |
| * reading the device header. |
| */ |
| kstat_named_t arcstat_l2_rebuild_abort_dh_errors; |
| /* |
| * Number of L2ARC log blocks which failed to be restored due to |
| * checksum errors. |
| */ |
| kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors; |
| /* |
| * Number of times the L2ARC rebuild was aborted due to low system |
| * memory. |
| */ |
| kstat_named_t arcstat_l2_rebuild_abort_lowmem; |
| /* Logical size of L2ARC restored data, in bytes. */ |
| kstat_named_t arcstat_l2_rebuild_size; |
| /* Aligned size of L2ARC restored data, in bytes. */ |
| kstat_named_t arcstat_l2_rebuild_asize; |
| /* |
| * Number of L2ARC log entries (buffers) that were successfully |
| * restored in ARC. |
| */ |
| kstat_named_t arcstat_l2_rebuild_bufs; |
| /* |
| * Number of L2ARC log entries (buffers) already cached in ARC. These |
| * were not restored again. |
| */ |
| kstat_named_t arcstat_l2_rebuild_bufs_precached; |
| /* |
| * Number of L2ARC log blocks that were restored successfully. Each |
| * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers. |
| */ |
| kstat_named_t arcstat_l2_rebuild_log_blks; |
| kstat_named_t arcstat_memory_throttle_count; |
| kstat_named_t arcstat_memory_direct_count; |
| kstat_named_t arcstat_memory_indirect_count; |
| kstat_named_t arcstat_memory_all_bytes; |
| kstat_named_t arcstat_memory_free_bytes; |
| kstat_named_t arcstat_memory_available_bytes; |
| kstat_named_t arcstat_no_grow; |
| kstat_named_t arcstat_tempreserve; |
| kstat_named_t arcstat_loaned_bytes; |
| kstat_named_t arcstat_prune; |
| kstat_named_t arcstat_meta_used; |
| kstat_named_t arcstat_meta_limit; |
| kstat_named_t arcstat_dnode_limit; |
| kstat_named_t arcstat_meta_max; |
| kstat_named_t arcstat_meta_min; |
| kstat_named_t arcstat_async_upgrade_sync; |
| kstat_named_t arcstat_demand_hit_predictive_prefetch; |
| kstat_named_t arcstat_demand_hit_prescient_prefetch; |
| kstat_named_t arcstat_need_free; |
| kstat_named_t arcstat_sys_free; |
| kstat_named_t arcstat_raw_size; |
| kstat_named_t arcstat_cached_only_in_progress; |
| kstat_named_t arcstat_abd_chunk_waste_size; |
| } arc_stats_t; |
| |
| typedef struct arc_sums { |
| wmsum_t arcstat_hits; |
| wmsum_t arcstat_misses; |
| wmsum_t arcstat_demand_data_hits; |
| wmsum_t arcstat_demand_data_misses; |
| wmsum_t arcstat_demand_metadata_hits; |
| wmsum_t arcstat_demand_metadata_misses; |
| wmsum_t arcstat_prefetch_data_hits; |
| wmsum_t arcstat_prefetch_data_misses; |
| wmsum_t arcstat_prefetch_metadata_hits; |
| wmsum_t arcstat_prefetch_metadata_misses; |
| wmsum_t arcstat_mru_hits; |
| wmsum_t arcstat_mru_ghost_hits; |
| wmsum_t arcstat_mfu_hits; |
| wmsum_t arcstat_mfu_ghost_hits; |
| wmsum_t arcstat_deleted; |
| wmsum_t arcstat_mutex_miss; |
| wmsum_t arcstat_access_skip; |
| wmsum_t arcstat_evict_skip; |
| wmsum_t arcstat_evict_not_enough; |
| wmsum_t arcstat_evict_l2_cached; |
| wmsum_t arcstat_evict_l2_eligible; |
| wmsum_t arcstat_evict_l2_eligible_mfu; |
| wmsum_t arcstat_evict_l2_eligible_mru; |
| wmsum_t arcstat_evict_l2_ineligible; |
| wmsum_t arcstat_evict_l2_skip; |
| wmsum_t arcstat_hash_collisions; |
| wmsum_t arcstat_hash_chains; |
| aggsum_t arcstat_size; |
| wmsum_t arcstat_compressed_size; |
| wmsum_t arcstat_uncompressed_size; |
| wmsum_t arcstat_overhead_size; |
| wmsum_t arcstat_hdr_size; |
| wmsum_t arcstat_data_size; |
| wmsum_t arcstat_metadata_size; |
| wmsum_t arcstat_dbuf_size; |
| aggsum_t arcstat_dnode_size; |
| wmsum_t arcstat_bonus_size; |
| wmsum_t arcstat_l2_hits; |
| wmsum_t arcstat_l2_misses; |
| wmsum_t arcstat_l2_prefetch_asize; |
| wmsum_t arcstat_l2_mru_asize; |
| wmsum_t arcstat_l2_mfu_asize; |
| wmsum_t arcstat_l2_bufc_data_asize; |
| wmsum_t arcstat_l2_bufc_metadata_asize; |
| wmsum_t arcstat_l2_feeds; |
| wmsum_t arcstat_l2_rw_clash; |
| wmsum_t arcstat_l2_read_bytes; |
| wmsum_t arcstat_l2_write_bytes; |
| wmsum_t arcstat_l2_writes_sent; |
| wmsum_t arcstat_l2_writes_done; |
| wmsum_t arcstat_l2_writes_error; |
| wmsum_t arcstat_l2_writes_lock_retry; |
| wmsum_t arcstat_l2_evict_lock_retry; |
| wmsum_t arcstat_l2_evict_reading; |
| wmsum_t arcstat_l2_evict_l1cached; |
| wmsum_t arcstat_l2_free_on_write; |
| wmsum_t arcstat_l2_abort_lowmem; |
| wmsum_t arcstat_l2_cksum_bad; |
| wmsum_t arcstat_l2_io_error; |
| wmsum_t arcstat_l2_lsize; |
| wmsum_t arcstat_l2_psize; |
| aggsum_t arcstat_l2_hdr_size; |
| wmsum_t arcstat_l2_log_blk_writes; |
| wmsum_t arcstat_l2_log_blk_asize; |
| wmsum_t arcstat_l2_log_blk_count; |
| wmsum_t arcstat_l2_rebuild_success; |
| wmsum_t arcstat_l2_rebuild_abort_unsupported; |
| wmsum_t arcstat_l2_rebuild_abort_io_errors; |
| wmsum_t arcstat_l2_rebuild_abort_dh_errors; |
| wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors; |
| wmsum_t arcstat_l2_rebuild_abort_lowmem; |
| wmsum_t arcstat_l2_rebuild_size; |
| wmsum_t arcstat_l2_rebuild_asize; |
| wmsum_t arcstat_l2_rebuild_bufs; |
| wmsum_t arcstat_l2_rebuild_bufs_precached; |
| wmsum_t arcstat_l2_rebuild_log_blks; |
| wmsum_t arcstat_memory_throttle_count; |
| wmsum_t arcstat_memory_direct_count; |
| wmsum_t arcstat_memory_indirect_count; |
| wmsum_t arcstat_prune; |
| aggsum_t arcstat_meta_used; |
| wmsum_t arcstat_async_upgrade_sync; |
| wmsum_t arcstat_demand_hit_predictive_prefetch; |
| wmsum_t arcstat_demand_hit_prescient_prefetch; |
| wmsum_t arcstat_raw_size; |
| wmsum_t arcstat_cached_only_in_progress; |
| wmsum_t arcstat_abd_chunk_waste_size; |
| } arc_sums_t; |
| |
| typedef struct arc_evict_waiter { |
| list_node_t aew_node; |
| kcondvar_t aew_cv; |
| uint64_t aew_count; |
| } arc_evict_waiter_t; |
| |
| #define ARCSTAT(stat) (arc_stats.stat.value.ui64) |
| |
| #define ARCSTAT_INCR(stat, val) \ |
| wmsum_add(&arc_sums.stat, (val)) |
| |
| #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) |
| #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) |
| |
| #define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */ |
| #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ |
| #define arc_c ARCSTAT(arcstat_c) /* target size of cache */ |
| #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ |
| #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ |
| #define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */ |
| |
| #define arc_anon (&ARC_anon) |
| #define arc_mru (&ARC_mru) |
| #define arc_mru_ghost (&ARC_mru_ghost) |
| #define arc_mfu (&ARC_mfu) |
| #define arc_mfu_ghost (&ARC_mfu_ghost) |
| #define arc_l2c_only (&ARC_l2c_only) |
| |
| extern taskq_t *arc_prune_taskq; |
| extern arc_stats_t arc_stats; |
| extern arc_sums_t arc_sums; |
| extern hrtime_t arc_growtime; |
| extern boolean_t arc_warm; |
| extern int arc_grow_retry; |
| extern int arc_no_grow_shift; |
| extern int arc_shrink_shift; |
| extern kmutex_t arc_prune_mtx; |
| extern list_t arc_prune_list; |
| extern arc_state_t ARC_mfu; |
| extern arc_state_t ARC_mru; |
| extern uint_t zfs_arc_pc_percent; |
| extern int arc_lotsfree_percent; |
| extern unsigned long zfs_arc_min; |
| extern unsigned long zfs_arc_max; |
| |
| extern void arc_reduce_target_size(int64_t to_free); |
| extern boolean_t arc_reclaim_needed(void); |
| extern void arc_kmem_reap_soon(void); |
| extern void arc_wait_for_eviction(uint64_t, boolean_t); |
| |
| extern void arc_lowmem_init(void); |
| extern void arc_lowmem_fini(void); |
| extern void arc_prune_async(int64_t); |
| extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg); |
| extern uint64_t arc_free_memory(void); |
| extern int64_t arc_available_memory(void); |
| extern void arc_tuning_update(boolean_t); |
| extern void arc_register_hotplug(void); |
| extern void arc_unregister_hotplug(void); |
| |
| extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS); |
| extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS); |
| extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS); |
| extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS); |
| |
| /* used in zdb.c */ |
| boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev, |
| const l2arc_log_blkptr_t *lbp); |
| |
| /* used in vdev_trim.c */ |
| void l2arc_dev_hdr_update(l2arc_dev_t *dev); |
| l2arc_dev_t *l2arc_vdev_get(vdev_t *vd); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif /* _SYS_ARC_IMPL_H */ |