| /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
| |
| #include <errno.h> |
| #include <pthread.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| |
| #include "alloc-util.h" |
| #include "fileio.h" |
| #include "hashmap.h" |
| #include "macro.h" |
| #include "memory-util.h" |
| #include "mempool.h" |
| #include "missing_syscall.h" |
| #include "process-util.h" |
| #include "random-util.h" |
| #include "set.h" |
| #include "siphash24.h" |
| #include "string-util.h" |
| #include "strv.h" |
| |
| #if ENABLE_DEBUG_HASHMAP |
| #include "list.h" |
| #endif |
| |
| /* |
| * Implementation of hashmaps. |
| * Addressing: open |
| * - uses less RAM compared to closed addressing (chaining), because |
| * our entries are small (especially in Sets, which tend to contain |
| * the majority of entries in systemd). |
| * Collision resolution: Robin Hood |
| * - tends to equalize displacement of entries from their optimal buckets. |
| * Probe sequence: linear |
| * - though theoretically worse than random probing/uniform hashing/double |
| * hashing, it is good for cache locality. |
| * |
| * References: |
| * Celis, P. 1986. Robin Hood Hashing. |
| * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada. |
| * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf |
| * - The results are derived for random probing. Suggests deletion with |
| * tombstones and two mean-centered search methods. None of that works |
| * well for linear probing. |
| * |
| * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies. |
| * ACM Trans. Algorithms 1, 2 (October 2005), 177-213. |
| * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964 |
| * http://www.math.uu.se/~svante/papers/sj157.pdf |
| * - Applies to Robin Hood with linear probing. Contains remarks on |
| * the unsuitability of mean-centered search with linear probing. |
| * |
| * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing. |
| * ACM Trans. Algorithms 1, 2 (October 2005), 214-242. |
| * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965 |
| * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes |
| * in a successful search), and Janson writes about displacement. C = d + 1. |
| * |
| * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion. |
| * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ |
| * - Explanation of backward shift deletion with pictures. |
| * |
| * Khuong, P. 2013. The Other Robin Hood Hashing. |
| * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/ |
| * - Short summary of random vs. linear probing, and tombstones vs. backward shift. |
| */ |
| |
| /* |
| * XXX Ideas for improvement: |
| * For unordered hashmaps, randomize iteration order, similarly to Perl: |
| * http://blog.booking.com/hardening-perls-hash-function.html |
| */ |
| |
| /* INV_KEEP_FREE = 1 / (1 - max_load_factor) |
| * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */ |
| #define INV_KEEP_FREE 5U |
| |
| /* Fields common to entries of all hashmap/set types */ |
| struct hashmap_base_entry { |
| const void *key; |
| }; |
| |
| /* Entry types for specific hashmap/set types |
| * hashmap_base_entry must be at the beginning of each entry struct. */ |
| |
| struct plain_hashmap_entry { |
| struct hashmap_base_entry b; |
| void *value; |
| }; |
| |
| struct ordered_hashmap_entry { |
| struct plain_hashmap_entry p; |
| unsigned iterate_next, iterate_previous; |
| }; |
| |
| struct set_entry { |
| struct hashmap_base_entry b; |
| }; |
| |
| /* In several functions it is advantageous to have the hash table extended |
| * virtually by a couple of additional buckets. We reserve special index values |
| * for these "swap" buckets. */ |
| #define _IDX_SWAP_BEGIN (UINT_MAX - 3) |
| #define IDX_PUT (_IDX_SWAP_BEGIN + 0) |
| #define IDX_TMP (_IDX_SWAP_BEGIN + 1) |
| #define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2) |
| |
| #define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */ |
| #define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */ |
| |
| assert_cc(IDX_FIRST == _IDX_SWAP_END); |
| assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST); |
| |
| /* Storage space for the "swap" buckets. |
| * All entry types can fit into an ordered_hashmap_entry. */ |
| struct swap_entries { |
| struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN]; |
| }; |
| |
| /* Distance from Initial Bucket */ |
| typedef uint8_t dib_raw_t; |
| #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */ |
| #define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */ |
| #define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */ |
| #define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */ |
| |
| #define DIB_FREE UINT_MAX |
| |
| #if ENABLE_DEBUG_HASHMAP |
| struct hashmap_debug_info { |
| LIST_FIELDS(struct hashmap_debug_info, debug_list); |
| unsigned max_entries; /* high watermark of n_entries */ |
| |
| /* who allocated this hashmap */ |
| int line; |
| const char *file; |
| const char *func; |
| |
| /* fields to detect modification while iterating */ |
| unsigned put_count; /* counts puts into the hashmap */ |
| unsigned rem_count; /* counts removals from hashmap */ |
| unsigned last_rem_idx; /* remembers last removal index */ |
| }; |
| |
| /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */ |
| static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list); |
| static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER; |
| #endif |
| |
| enum HashmapType { |
| HASHMAP_TYPE_PLAIN, |
| HASHMAP_TYPE_ORDERED, |
| HASHMAP_TYPE_SET, |
| _HASHMAP_TYPE_MAX |
| }; |
| |
| struct _packed_ indirect_storage { |
| void *storage; /* where buckets and DIBs are stored */ |
| uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */ |
| |
| unsigned n_entries; /* number of stored entries */ |
| unsigned n_buckets; /* number of buckets */ |
| |
| unsigned idx_lowest_entry; /* Index below which all buckets are free. |
| Makes "while(hashmap_steal_first())" loops |
| O(n) instead of O(n^2) for unordered hashmaps. */ |
| uint8_t _pad[3]; /* padding for the whole HashmapBase */ |
| /* The bitfields in HashmapBase complete the alignment of the whole thing. */ |
| }; |
| |
| struct direct_storage { |
| /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit. |
| * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit, |
| * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */ |
| uint8_t storage[sizeof(struct indirect_storage)]; |
| }; |
| |
| #define DIRECT_BUCKETS(entry_t) \ |
| (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t))) |
| |
| /* We should be able to store at least one entry directly. */ |
| assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1); |
| |
| /* We have 3 bits for n_direct_entries. */ |
| assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3)); |
| |
| /* Hashmaps with directly stored entries all use this shared hash key. |
| * It's no big deal if the key is guessed, because there can be only |
| * a handful of directly stored entries in a hashmap. When a hashmap |
| * outgrows direct storage, it gets its own key for indirect storage. */ |
| static uint8_t shared_hash_key[HASH_KEY_SIZE]; |
| |
| /* Fields that all hashmap/set types must have */ |
| struct HashmapBase { |
| const struct hash_ops *hash_ops; /* hash and compare ops to use */ |
| |
| union _packed_ { |
| struct indirect_storage indirect; /* if has_indirect */ |
| struct direct_storage direct; /* if !has_indirect */ |
| }; |
| |
| enum HashmapType type:2; /* HASHMAP_TYPE_* */ |
| bool has_indirect:1; /* whether indirect storage is used */ |
| unsigned n_direct_entries:3; /* Number of entries in direct storage. |
| * Only valid if !has_indirect. */ |
| bool from_pool:1; /* whether was allocated from mempool */ |
| bool dirty:1; /* whether dirtied since last iterated_cache_get() */ |
| bool cached:1; /* whether this hashmap is being cached */ |
| |
| #if ENABLE_DEBUG_HASHMAP |
| struct hashmap_debug_info debug; |
| #endif |
| }; |
| |
| /* Specific hash types |
| * HashmapBase must be at the beginning of each hashmap struct. */ |
| |
| struct Hashmap { |
| struct HashmapBase b; |
| }; |
| |
| struct OrderedHashmap { |
| struct HashmapBase b; |
| unsigned iterate_list_head, iterate_list_tail; |
| }; |
| |
| struct Set { |
| struct HashmapBase b; |
| }; |
| |
| typedef struct CacheMem { |
| const void **ptr; |
| size_t n_populated; |
| bool active:1; |
| } CacheMem; |
| |
| struct IteratedCache { |
| HashmapBase *hashmap; |
| CacheMem keys, values; |
| }; |
| |
| DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8); |
| DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8); |
| /* No need for a separate Set pool */ |
| assert_cc(sizeof(Hashmap) == sizeof(Set)); |
| |
| struct hashmap_type_info { |
| size_t head_size; |
| size_t entry_size; |
| struct mempool *mempool; |
| unsigned n_direct_buckets; |
| }; |
| |
| static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = { |
| [HASHMAP_TYPE_PLAIN] = { |
| .head_size = sizeof(Hashmap), |
| .entry_size = sizeof(struct plain_hashmap_entry), |
| .mempool = &hashmap_pool, |
| .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry), |
| }, |
| [HASHMAP_TYPE_ORDERED] = { |
| .head_size = sizeof(OrderedHashmap), |
| .entry_size = sizeof(struct ordered_hashmap_entry), |
| .mempool = &ordered_hashmap_pool, |
| .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry), |
| }, |
| [HASHMAP_TYPE_SET] = { |
| .head_size = sizeof(Set), |
| .entry_size = sizeof(struct set_entry), |
| .mempool = &hashmap_pool, |
| .n_direct_buckets = DIRECT_BUCKETS(struct set_entry), |
| }, |
| }; |
| |
| #if VALGRIND |
| _destructor_ static void cleanup_pools(void) { |
| _cleanup_free_ char *t = NULL; |
| int r; |
| |
| /* Be nice to valgrind */ |
| |
| /* The pool is only allocated by the main thread, but the memory can |
| * be passed to other threads. Let's clean up if we are the main thread |
| * and no other threads are live. */ |
| /* We build our own is_main_thread() here, which doesn't use C11 |
| * TLS based caching of the result. That's because valgrind apparently |
| * doesn't like malloc() (which C11 TLS internally uses) to be called |
| * from a GCC destructors. */ |
| if (getpid() != gettid()) |
| return; |
| |
| r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t); |
| if (r < 0 || !streq(t, "1")) |
| return; |
| |
| mempool_drop(&hashmap_pool); |
| mempool_drop(&ordered_hashmap_pool); |
| } |
| #endif |
| |
| static unsigned n_buckets(HashmapBase *h) { |
| return h->has_indirect ? h->indirect.n_buckets |
| : hashmap_type_info[h->type].n_direct_buckets; |
| } |
| |
| static unsigned n_entries(HashmapBase *h) { |
| return h->has_indirect ? h->indirect.n_entries |
| : h->n_direct_entries; |
| } |
| |
| static void n_entries_inc(HashmapBase *h) { |
| if (h->has_indirect) |
| h->indirect.n_entries++; |
| else |
| h->n_direct_entries++; |
| } |
| |
| static void n_entries_dec(HashmapBase *h) { |
| if (h->has_indirect) |
| h->indirect.n_entries--; |
| else |
| h->n_direct_entries--; |
| } |
| |
| static void* storage_ptr(HashmapBase *h) { |
| return h->has_indirect ? h->indirect.storage |
| : h->direct.storage; |
| } |
| |
| static uint8_t* hash_key(HashmapBase *h) { |
| return h->has_indirect ? h->indirect.hash_key |
| : shared_hash_key; |
| } |
| |
| static unsigned base_bucket_hash(HashmapBase *h, const void *p) { |
| struct siphash state; |
| uint64_t hash; |
| |
| siphash24_init(&state, hash_key(h)); |
| |
| h->hash_ops->hash(p, &state); |
| |
| hash = siphash24_finalize(&state); |
| |
| return (unsigned) (hash % n_buckets(h)); |
| } |
| #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p) |
| |
| static void base_set_dirty(HashmapBase *h) { |
| h->dirty = true; |
| } |
| #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h)) |
| |
| static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) { |
| static uint8_t current[HASH_KEY_SIZE]; |
| static bool current_initialized = false; |
| |
| /* Returns a hash function key to use. In order to keep things |
| * fast we will not generate a new key each time we allocate a |
| * new hash table. Instead, we'll just reuse the most recently |
| * generated one, except if we never generated one or when we |
| * are rehashing an entire hash table because we reached a |
| * fill level */ |
| |
| if (!current_initialized || !reuse_is_ok) { |
| random_bytes(current, sizeof(current)); |
| current_initialized = true; |
| } |
| |
| memcpy(hash_key, current, sizeof(current)); |
| } |
| |
| static struct hashmap_base_entry* bucket_at(HashmapBase *h, unsigned idx) { |
| return (struct hashmap_base_entry*) |
| ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size); |
| } |
| |
| static struct plain_hashmap_entry* plain_bucket_at(Hashmap *h, unsigned idx) { |
| return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); |
| } |
| |
| static struct ordered_hashmap_entry* ordered_bucket_at(OrderedHashmap *h, unsigned idx) { |
| return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); |
| } |
| |
| static struct set_entry *set_bucket_at(Set *h, unsigned idx) { |
| return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx); |
| } |
| |
| static struct ordered_hashmap_entry* bucket_at_swap(struct swap_entries *swap, unsigned idx) { |
| return &swap->e[idx - _IDX_SWAP_BEGIN]; |
| } |
| |
| /* Returns a pointer to the bucket at index idx. |
| * Understands real indexes and swap indexes, hence "_virtual". */ |
| static struct hashmap_base_entry* bucket_at_virtual(HashmapBase *h, struct swap_entries *swap, |
| unsigned idx) { |
| if (idx < _IDX_SWAP_BEGIN) |
| return bucket_at(h, idx); |
| |
| if (idx < _IDX_SWAP_END) |
| return &bucket_at_swap(swap, idx)->p.b; |
| |
| assert_not_reached("Invalid index"); |
| } |
| |
| static dib_raw_t* dib_raw_ptr(HashmapBase *h) { |
| return (dib_raw_t*) |
| ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h)); |
| } |
| |
| static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) { |
| return idx >= from ? idx - from |
| : n_buckets(h) + idx - from; |
| } |
| |
| static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) { |
| unsigned initial_bucket; |
| |
| if (raw_dib == DIB_RAW_FREE) |
| return DIB_FREE; |
| |
| if (_likely_(raw_dib < DIB_RAW_OVERFLOW)) |
| return raw_dib; |
| |
| /* |
| * Having an overflow DIB value is very unlikely. The hash function |
| * would have to be bad. For example, in a table of size 2^24 filled |
| * to load factor 0.9 the maximum observed DIB is only about 60. |
| * In theory (assuming I used Maxima correctly), for an infinite size |
| * hash table with load factor 0.8 the probability of a given entry |
| * having DIB > 40 is 1.9e-8. |
| * This returns the correct DIB value by recomputing the hash value in |
| * the unlikely case. XXX Hitting this case could be a hint to rehash. |
| */ |
| initial_bucket = bucket_hash(h, bucket_at(h, idx)->key); |
| return bucket_distance(h, idx, initial_bucket); |
| } |
| |
| static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) { |
| dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE; |
| } |
| |
| static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) { |
| dib_raw_t *dibs; |
| |
| dibs = dib_raw_ptr(h); |
| |
| for ( ; idx < n_buckets(h); idx++) |
| if (dibs[idx] != DIB_RAW_FREE) |
| return idx; |
| |
| return IDX_NIL; |
| } |
| |
| static void bucket_mark_free(HashmapBase *h, unsigned idx) { |
| memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size); |
| bucket_set_dib(h, idx, DIB_FREE); |
| } |
| |
| static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap, |
| unsigned from, unsigned to) { |
| struct hashmap_base_entry *e_from, *e_to; |
| |
| assert(from != to); |
| |
| e_from = bucket_at_virtual(h, swap, from); |
| e_to = bucket_at_virtual(h, swap, to); |
| |
| memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size); |
| |
| if (h->type == HASHMAP_TYPE_ORDERED) { |
| OrderedHashmap *lh = (OrderedHashmap*) h; |
| struct ordered_hashmap_entry *le, *le_to; |
| |
| le_to = (struct ordered_hashmap_entry*) e_to; |
| |
| if (le_to->iterate_next != IDX_NIL) { |
| le = (struct ordered_hashmap_entry*) |
| bucket_at_virtual(h, swap, le_to->iterate_next); |
| le->iterate_previous = to; |
| } |
| |
| if (le_to->iterate_previous != IDX_NIL) { |
| le = (struct ordered_hashmap_entry*) |
| bucket_at_virtual(h, swap, le_to->iterate_previous); |
| le->iterate_next = to; |
| } |
| |
| if (lh->iterate_list_head == from) |
| lh->iterate_list_head = to; |
| if (lh->iterate_list_tail == from) |
| lh->iterate_list_tail = to; |
| } |
| } |
| |
| static unsigned next_idx(HashmapBase *h, unsigned idx) { |
| return (idx + 1U) % n_buckets(h); |
| } |
| |
| static unsigned prev_idx(HashmapBase *h, unsigned idx) { |
| return (n_buckets(h) + idx - 1U) % n_buckets(h); |
| } |
| |
| static void* entry_value(HashmapBase *h, struct hashmap_base_entry *e) { |
| switch (h->type) { |
| |
| case HASHMAP_TYPE_PLAIN: |
| case HASHMAP_TYPE_ORDERED: |
| return ((struct plain_hashmap_entry*)e)->value; |
| |
| case HASHMAP_TYPE_SET: |
| return (void*) e->key; |
| |
| default: |
| assert_not_reached("Unknown hashmap type"); |
| } |
| } |
| |
| static void base_remove_entry(HashmapBase *h, unsigned idx) { |
| unsigned left, right, prev, dib; |
| dib_raw_t raw_dib, *dibs; |
| |
| dibs = dib_raw_ptr(h); |
| assert(dibs[idx] != DIB_RAW_FREE); |
| |
| #if ENABLE_DEBUG_HASHMAP |
| h->debug.rem_count++; |
| h->debug.last_rem_idx = idx; |
| #endif |
| |
| left = idx; |
| /* Find the stop bucket ("right"). It is either free or has DIB == 0. */ |
| for (right = next_idx(h, left); ; right = next_idx(h, right)) { |
| raw_dib = dibs[right]; |
| if (IN_SET(raw_dib, 0, DIB_RAW_FREE)) |
| break; |
| |
| /* The buckets are not supposed to be all occupied and with DIB > 0. |
| * That would mean we could make everyone better off by shifting them |
| * backward. This scenario is impossible. */ |
| assert(left != right); |
| } |
| |
| if (h->type == HASHMAP_TYPE_ORDERED) { |
| OrderedHashmap *lh = (OrderedHashmap*) h; |
| struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx); |
| |
| if (le->iterate_next != IDX_NIL) |
| ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous; |
| else |
| lh->iterate_list_tail = le->iterate_previous; |
| |
| if (le->iterate_previous != IDX_NIL) |
| ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next; |
| else |
| lh->iterate_list_head = le->iterate_next; |
| } |
| |
| /* Now shift all buckets in the interval (left, right) one step backwards */ |
| for (prev = left, left = next_idx(h, left); left != right; |
| prev = left, left = next_idx(h, left)) { |
| dib = bucket_calculate_dib(h, left, dibs[left]); |
| assert(dib != 0); |
| bucket_move_entry(h, NULL, left, prev); |
| bucket_set_dib(h, prev, dib - 1); |
| } |
| |
| bucket_mark_free(h, prev); |
| n_entries_dec(h); |
| base_set_dirty(h); |
| } |
| #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx) |
| |
| static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) { |
| struct ordered_hashmap_entry *e; |
| unsigned idx; |
| |
| assert(h); |
| assert(i); |
| |
| if (i->idx == IDX_NIL) |
| goto at_end; |
| |
| if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL) |
| goto at_end; |
| |
| if (i->idx == IDX_FIRST) { |
| idx = h->iterate_list_head; |
| e = ordered_bucket_at(h, idx); |
| } else { |
| idx = i->idx; |
| e = ordered_bucket_at(h, idx); |
| /* |
| * We allow removing the current entry while iterating, but removal may cause |
| * a backward shift. The next entry may thus move one bucket to the left. |
| * To detect when it happens, we remember the key pointer of the entry we were |
| * going to iterate next. If it does not match, there was a backward shift. |
| */ |
| if (e->p.b.key != i->next_key) { |
| idx = prev_idx(HASHMAP_BASE(h), idx); |
| e = ordered_bucket_at(h, idx); |
| } |
| assert(e->p.b.key == i->next_key); |
| } |
| |
| #if ENABLE_DEBUG_HASHMAP |
| i->prev_idx = idx; |
| #endif |
| |
| if (e->iterate_next != IDX_NIL) { |
| struct ordered_hashmap_entry *n; |
| i->idx = e->iterate_next; |
| n = ordered_bucket_at(h, i->idx); |
| i->next_key = n->p.b.key; |
| } else |
| i->idx = IDX_NIL; |
| |
| return idx; |
| |
| at_end: |
| i->idx = IDX_NIL; |
| return IDX_NIL; |
| } |
| |
| static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) { |
| unsigned idx; |
| |
| assert(h); |
| assert(i); |
| |
| if (i->idx == IDX_NIL) |
| goto at_end; |
| |
| if (i->idx == IDX_FIRST) { |
| /* fast forward to the first occupied bucket */ |
| if (h->has_indirect) { |
| i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry); |
| h->indirect.idx_lowest_entry = i->idx; |
| } else |
| i->idx = skip_free_buckets(h, 0); |
| |
| if (i->idx == IDX_NIL) |
| goto at_end; |
| } else { |
| struct hashmap_base_entry *e; |
| |
| assert(i->idx > 0); |
| |
| e = bucket_at(h, i->idx); |
| /* |
| * We allow removing the current entry while iterating, but removal may cause |
| * a backward shift. The next entry may thus move one bucket to the left. |
| * To detect when it happens, we remember the key pointer of the entry we were |
| * going to iterate next. If it does not match, there was a backward shift. |
| */ |
| if (e->key != i->next_key) |
| e = bucket_at(h, --i->idx); |
| |
| assert(e->key == i->next_key); |
| } |
| |
| idx = i->idx; |
| #if ENABLE_DEBUG_HASHMAP |
| i->prev_idx = idx; |
| #endif |
| |
| i->idx = skip_free_buckets(h, i->idx + 1); |
| if (i->idx != IDX_NIL) |
| i->next_key = bucket_at(h, i->idx)->key; |
| else |
| i->idx = IDX_NIL; |
| |
| return idx; |
| |
| at_end: |
| i->idx = IDX_NIL; |
| return IDX_NIL; |
| } |
| |
| static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) { |
| if (!h) { |
| i->idx = IDX_NIL; |
| return IDX_NIL; |
| } |
| |
| #if ENABLE_DEBUG_HASHMAP |
| if (i->idx == IDX_FIRST) { |
| i->put_count = h->debug.put_count; |
| i->rem_count = h->debug.rem_count; |
| } else { |
| /* While iterating, must not add any new entries */ |
| assert(i->put_count == h->debug.put_count); |
| /* ... or remove entries other than the current one */ |
| assert(i->rem_count == h->debug.rem_count || |
| (i->rem_count == h->debug.rem_count - 1 && |
| i->prev_idx == h->debug.last_rem_idx)); |
| /* Reset our removals counter */ |
| i->rem_count = h->debug.rem_count; |
| } |
| #endif |
| |
| return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i) |
| : hashmap_iterate_in_internal_order(h, i); |
| } |
| |
| bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) { |
| struct hashmap_base_entry *e; |
| void *data; |
| unsigned idx; |
| |
| idx = hashmap_iterate_entry(h, i); |
| if (idx == IDX_NIL) { |
| if (value) |
| *value = NULL; |
| if (key) |
| *key = NULL; |
| |
| return false; |
| } |
| |
| e = bucket_at(h, idx); |
| data = entry_value(h, e); |
| if (value) |
| *value = data; |
| if (key) |
| *key = e->key; |
| |
| return true; |
| } |
| |
| #define HASHMAP_FOREACH_IDX(idx, h, i) \ |
| for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \ |
| (idx != IDX_NIL); \ |
| (idx) = hashmap_iterate_entry((h), &(i))) |
| |
| IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h) { |
| IteratedCache *cache; |
| |
| assert(h); |
| assert(!h->cached); |
| |
| if (h->cached) |
| return NULL; |
| |
| cache = new0(IteratedCache, 1); |
| if (!cache) |
| return NULL; |
| |
| cache->hashmap = h; |
| h->cached = true; |
| |
| return cache; |
| } |
| |
| static void reset_direct_storage(HashmapBase *h) { |
| const struct hashmap_type_info *hi = &hashmap_type_info[h->type]; |
| void *p; |
| |
| assert(!h->has_indirect); |
| |
| p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets); |
| memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets); |
| } |
| |
| static void shared_hash_key_initialize(void) { |
| random_bytes(shared_hash_key, sizeof(shared_hash_key)); |
| } |
| |
| static struct HashmapBase* hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) { |
| HashmapBase *h; |
| const struct hashmap_type_info *hi = &hashmap_type_info[type]; |
| bool up; |
| |
| up = mempool_enabled(); |
| |
| h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size); |
| if (!h) |
| return NULL; |
| |
| h->type = type; |
| h->from_pool = up; |
| h->hash_ops = hash_ops ?: &trivial_hash_ops; |
| |
| if (type == HASHMAP_TYPE_ORDERED) { |
| OrderedHashmap *lh = (OrderedHashmap*)h; |
| lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; |
| } |
| |
| reset_direct_storage(h); |
| |
| static pthread_once_t once = PTHREAD_ONCE_INIT; |
| assert_se(pthread_once(&once, shared_hash_key_initialize) == 0); |
| |
| #if ENABLE_DEBUG_HASHMAP |
| h->debug.func = func; |
| h->debug.file = file; |
| h->debug.line = line; |
| assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); |
| LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug); |
| assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); |
| #endif |
| |
| return h; |
| } |
| |
| Hashmap *_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| OrderedHashmap *_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| Set *_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops, |
| enum HashmapType type HASHMAP_DEBUG_PARAMS) { |
| HashmapBase *q; |
| |
| assert(h); |
| |
| if (*h) |
| return 0; |
| |
| q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS); |
| if (!q) |
| return -ENOMEM; |
| |
| *h = q; |
| return 1; |
| } |
| |
| int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { |
| return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); |
| } |
| |
| int _hashmap_ensure_put(Hashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) { |
| int r; |
| |
| r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| return hashmap_put(*h, key, value); |
| } |
| |
| int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) { |
| int r; |
| |
| r = _ordered_hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| return ordered_hashmap_put(*h, key, value); |
| } |
| |
| static void hashmap_free_no_clear(HashmapBase *h) { |
| assert(!h->has_indirect); |
| assert(h->n_direct_entries == 0); |
| |
| #if ENABLE_DEBUG_HASHMAP |
| assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); |
| LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug); |
| assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); |
| #endif |
| |
| if (h->from_pool) { |
| /* Ensure that the object didn't get migrated between threads. */ |
| assert_se(is_main_thread()); |
| mempool_free_tile(hashmap_type_info[h->type].mempool, h); |
| } else |
| free(h); |
| } |
| |
| HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { |
| if (h) { |
| _hashmap_clear(h, default_free_key, default_free_value); |
| hashmap_free_no_clear(h); |
| } |
| |
| return NULL; |
| } |
| |
| void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { |
| free_func_t free_key, free_value; |
| if (!h) |
| return; |
| |
| free_key = h->hash_ops->free_key ?: default_free_key; |
| free_value = h->hash_ops->free_value ?: default_free_value; |
| |
| if (free_key || free_value) { |
| |
| /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the |
| * hash table, and only then call the destructor functions. If these destructors then try to unregister |
| * themselves from our hash table a second time, the entry is already gone. */ |
| |
| while (_hashmap_size(h) > 0) { |
| void *k = NULL; |
| void *v; |
| |
| v = _hashmap_first_key_and_value(h, true, &k); |
| |
| if (free_key) |
| free_key(k); |
| |
| if (free_value) |
| free_value(v); |
| } |
| } |
| |
| if (h->has_indirect) { |
| free(h->indirect.storage); |
| h->has_indirect = false; |
| } |
| |
| h->n_direct_entries = 0; |
| reset_direct_storage(h); |
| |
| if (h->type == HASHMAP_TYPE_ORDERED) { |
| OrderedHashmap *lh = (OrderedHashmap*) h; |
| lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; |
| } |
| |
| base_set_dirty(h); |
| } |
| |
| static int resize_buckets(HashmapBase *h, unsigned entries_add); |
| |
| /* |
| * Finds an empty bucket to put an entry into, starting the scan at 'idx'. |
| * Performs Robin Hood swaps as it goes. The entry to put must be placed |
| * by the caller into swap slot IDX_PUT. |
| * If used for in-place resizing, may leave a displaced entry in swap slot |
| * IDX_PUT. Caller must rehash it next. |
| * Returns: true if it left a displaced entry to rehash next in IDX_PUT, |
| * false otherwise. |
| */ |
| static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx, |
| struct swap_entries *swap) { |
| dib_raw_t raw_dib, *dibs; |
| unsigned dib, distance; |
| |
| #if ENABLE_DEBUG_HASHMAP |
| h->debug.put_count++; |
| #endif |
| |
| dibs = dib_raw_ptr(h); |
| |
| for (distance = 0; ; distance++) { |
| raw_dib = dibs[idx]; |
| if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) { |
| if (raw_dib == DIB_RAW_REHASH) |
| bucket_move_entry(h, swap, idx, IDX_TMP); |
| |
| if (h->has_indirect && h->indirect.idx_lowest_entry > idx) |
| h->indirect.idx_lowest_entry = idx; |
| |
| bucket_set_dib(h, idx, distance); |
| bucket_move_entry(h, swap, IDX_PUT, idx); |
| if (raw_dib == DIB_RAW_REHASH) { |
| bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| dib = bucket_calculate_dib(h, idx, raw_dib); |
| |
| if (dib < distance) { |
| /* Found a wealthier entry. Go Robin Hood! */ |
| bucket_set_dib(h, idx, distance); |
| |
| /* swap the entries */ |
| bucket_move_entry(h, swap, idx, IDX_TMP); |
| bucket_move_entry(h, swap, IDX_PUT, idx); |
| bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); |
| |
| distance = dib; |
| } |
| |
| idx = next_idx(h, idx); |
| } |
| } |
| |
| /* |
| * Puts an entry into a hashmap, boldly - no check whether key already exists. |
| * The caller must place the entry (only its key and value, not link indexes) |
| * in swap slot IDX_PUT. |
| * Caller must ensure: the key does not exist yet in the hashmap. |
| * that resize is not needed if !may_resize. |
| * Returns: 1 if entry was put successfully. |
| * -ENOMEM if may_resize==true and resize failed with -ENOMEM. |
| * Cannot return -ENOMEM if !may_resize. |
| */ |
| static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx, |
| struct swap_entries *swap, bool may_resize) { |
| struct ordered_hashmap_entry *new_entry; |
| int r; |
| |
| assert(idx < n_buckets(h)); |
| |
| new_entry = bucket_at_swap(swap, IDX_PUT); |
| |
| if (may_resize) { |
| r = resize_buckets(h, 1); |
| if (r < 0) |
| return r; |
| if (r > 0) |
| idx = bucket_hash(h, new_entry->p.b.key); |
| } |
| assert(n_entries(h) < n_buckets(h)); |
| |
| if (h->type == HASHMAP_TYPE_ORDERED) { |
| OrderedHashmap *lh = (OrderedHashmap*) h; |
| |
| new_entry->iterate_next = IDX_NIL; |
| new_entry->iterate_previous = lh->iterate_list_tail; |
| |
| if (lh->iterate_list_tail != IDX_NIL) { |
| struct ordered_hashmap_entry *old_tail; |
| |
| old_tail = ordered_bucket_at(lh, lh->iterate_list_tail); |
| assert(old_tail->iterate_next == IDX_NIL); |
| old_tail->iterate_next = IDX_PUT; |
| } |
| |
| lh->iterate_list_tail = IDX_PUT; |
| if (lh->iterate_list_head == IDX_NIL) |
| lh->iterate_list_head = IDX_PUT; |
| } |
| |
| assert_se(hashmap_put_robin_hood(h, idx, swap) == false); |
| |
| n_entries_inc(h); |
| #if ENABLE_DEBUG_HASHMAP |
| h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h)); |
| #endif |
| |
| base_set_dirty(h); |
| |
| return 1; |
| } |
| #define hashmap_put_boldly(h, idx, swap, may_resize) \ |
| hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize) |
| |
| /* |
| * Returns 0 if resize is not needed. |
| * 1 if successfully resized. |
| * -ENOMEM on allocation failure. |
| */ |
| static int resize_buckets(HashmapBase *h, unsigned entries_add) { |
| struct swap_entries swap; |
| void *new_storage; |
| dib_raw_t *old_dibs, *new_dibs; |
| const struct hashmap_type_info *hi; |
| unsigned idx, optimal_idx; |
| unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries; |
| uint8_t new_shift; |
| bool rehash_next; |
| |
| assert(h); |
| |
| hi = &hashmap_type_info[h->type]; |
| new_n_entries = n_entries(h) + entries_add; |
| |
| /* overflow? */ |
| if (_unlikely_(new_n_entries < entries_add)) |
| return -ENOMEM; |
| |
| /* For direct storage we allow 100% load, because it's tiny. */ |
| if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets) |
| return 0; |
| |
| /* |
| * Load factor = n/m = 1 - (1/INV_KEEP_FREE). |
| * From it follows: m = n + n/(INV_KEEP_FREE - 1) |
| */ |
| new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1); |
| /* overflow? */ |
| if (_unlikely_(new_n_buckets < new_n_entries)) |
| return -ENOMEM; |
| |
| if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t)))) |
| return -ENOMEM; |
| |
| old_n_buckets = n_buckets(h); |
| |
| if (_likely_(new_n_buckets <= old_n_buckets)) |
| return 0; |
| |
| new_shift = log2u_round_up(MAX( |
| new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)), |
| 2 * sizeof(struct direct_storage))); |
| |
| /* Realloc storage (buckets and DIB array). */ |
| new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL, |
| 1U << new_shift); |
| if (!new_storage) |
| return -ENOMEM; |
| |
| /* Must upgrade direct to indirect storage. */ |
| if (!h->has_indirect) { |
| memcpy(new_storage, h->direct.storage, |
| old_n_buckets * (hi->entry_size + sizeof(dib_raw_t))); |
| h->indirect.n_entries = h->n_direct_entries; |
| h->indirect.idx_lowest_entry = 0; |
| h->n_direct_entries = 0; |
| } |
| |
| /* Get a new hash key. If we've just upgraded to indirect storage, |
| * allow reusing a previously generated key. It's still a different key |
| * from the shared one that we used for direct storage. */ |
| get_hash_key(h->indirect.hash_key, !h->has_indirect); |
| |
| h->has_indirect = true; |
| h->indirect.storage = new_storage; |
| h->indirect.n_buckets = (1U << new_shift) / |
| (hi->entry_size + sizeof(dib_raw_t)); |
| |
| old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets); |
| new_dibs = dib_raw_ptr(h); |
| |
| /* |
| * Move the DIB array to the new place, replacing valid DIB values with |
| * DIB_RAW_REHASH to indicate all of the used buckets need rehashing. |
| * Note: Overlap is not possible, because we have at least doubled the |
| * number of buckets and dib_raw_t is smaller than any entry type. |
| */ |
| for (idx = 0; idx < old_n_buckets; idx++) { |
| assert(old_dibs[idx] != DIB_RAW_REHASH); |
| new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE |
| : DIB_RAW_REHASH; |
| } |
| |
| /* Zero the area of newly added entries (including the old DIB area) */ |
| memzero(bucket_at(h, old_n_buckets), |
| (n_buckets(h) - old_n_buckets) * hi->entry_size); |
| |
| /* The upper half of the new DIB array needs initialization */ |
| memset(&new_dibs[old_n_buckets], DIB_RAW_INIT, |
| (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t)); |
| |
| /* Rehash entries that need it */ |
| n_rehashed = 0; |
| for (idx = 0; idx < old_n_buckets; idx++) { |
| if (new_dibs[idx] != DIB_RAW_REHASH) |
| continue; |
| |
| optimal_idx = bucket_hash(h, bucket_at(h, idx)->key); |
| |
| /* |
| * Not much to do if by luck the entry hashes to its current |
| * location. Just set its DIB. |
| */ |
| if (optimal_idx == idx) { |
| new_dibs[idx] = 0; |
| n_rehashed++; |
| continue; |
| } |
| |
| new_dibs[idx] = DIB_RAW_FREE; |
| bucket_move_entry(h, &swap, idx, IDX_PUT); |
| /* bucket_move_entry does not clear the source */ |
| memzero(bucket_at(h, idx), hi->entry_size); |
| |
| do { |
| /* |
| * Find the new bucket for the current entry. This may make |
| * another entry homeless and load it into IDX_PUT. |
| */ |
| rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap); |
| n_rehashed++; |
| |
| /* Did the current entry displace another one? */ |
| if (rehash_next) |
| optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key); |
| } while (rehash_next); |
| } |
| |
| assert(n_rehashed == n_entries(h)); |
| |
| return 1; |
| } |
| |
| /* |
| * Finds an entry with a matching key |
| * Returns: index of the found entry, or IDX_NIL if not found. |
| */ |
| static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) { |
| struct hashmap_base_entry *e; |
| unsigned dib, distance; |
| dib_raw_t *dibs = dib_raw_ptr(h); |
| |
| assert(idx < n_buckets(h)); |
| |
| for (distance = 0; ; distance++) { |
| if (dibs[idx] == DIB_RAW_FREE) |
| return IDX_NIL; |
| |
| dib = bucket_calculate_dib(h, idx, dibs[idx]); |
| |
| if (dib < distance) |
| return IDX_NIL; |
| if (dib == distance) { |
| e = bucket_at(h, idx); |
| if (h->hash_ops->compare(e->key, key) == 0) |
| return idx; |
| } |
| |
| idx = next_idx(h, idx); |
| } |
| } |
| #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key) |
| |
| int hashmap_put(Hashmap *h, const void *key, void *value) { |
| struct swap_entries swap; |
| struct plain_hashmap_entry *e; |
| unsigned hash, idx; |
| |
| assert(h); |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx != IDX_NIL) { |
| e = plain_bucket_at(h, idx); |
| if (e->value == value) |
| return 0; |
| return -EEXIST; |
| } |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p; |
| e->b.key = key; |
| e->value = value; |
| return hashmap_put_boldly(h, hash, &swap, true); |
| } |
| |
| int set_put(Set *s, const void *key) { |
| struct swap_entries swap; |
| struct hashmap_base_entry *e; |
| unsigned hash, idx; |
| |
| assert(s); |
| |
| hash = bucket_hash(s, key); |
| idx = bucket_scan(s, hash, key); |
| if (idx != IDX_NIL) |
| return 0; |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p.b; |
| e->key = key; |
| return hashmap_put_boldly(s, hash, &swap, true); |
| } |
| |
| int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS) { |
| int r; |
| |
| r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| return set_put(*s, key); |
| } |
| |
| int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS) { |
| int r; |
| |
| r = _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_PASS_ARGS); |
| if (r <= 0) { |
| if (hash_ops && hash_ops->free_key) |
| hash_ops->free_key(key); |
| else |
| free(key); |
| } |
| |
| return r; |
| } |
| |
| int hashmap_replace(Hashmap *h, const void *key, void *value) { |
| struct swap_entries swap; |
| struct plain_hashmap_entry *e; |
| unsigned hash, idx; |
| |
| assert(h); |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx != IDX_NIL) { |
| e = plain_bucket_at(h, idx); |
| #if ENABLE_DEBUG_HASHMAP |
| /* Although the key is equal, the key pointer may have changed, |
| * and this would break our assumption for iterating. So count |
| * this operation as incompatible with iteration. */ |
| if (e->b.key != key) { |
| h->b.debug.put_count++; |
| h->b.debug.rem_count++; |
| h->b.debug.last_rem_idx = idx; |
| } |
| #endif |
| e->b.key = key; |
| e->value = value; |
| hashmap_set_dirty(h); |
| |
| return 0; |
| } |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p; |
| e->b.key = key; |
| e->value = value; |
| return hashmap_put_boldly(h, hash, &swap, true); |
| } |
| |
| int hashmap_update(Hashmap *h, const void *key, void *value) { |
| struct plain_hashmap_entry *e; |
| unsigned hash, idx; |
| |
| assert(h); |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return -ENOENT; |
| |
| e = plain_bucket_at(h, idx); |
| e->value = value; |
| hashmap_set_dirty(h); |
| |
| return 0; |
| } |
| |
| void* _hashmap_get(HashmapBase *h, const void *key) { |
| struct hashmap_base_entry *e; |
| unsigned hash, idx; |
| |
| if (!h) |
| return NULL; |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return NULL; |
| |
| e = bucket_at(h, idx); |
| return entry_value(h, e); |
| } |
| |
| void* hashmap_get2(Hashmap *h, const void *key, void **key2) { |
| struct plain_hashmap_entry *e; |
| unsigned hash, idx; |
| |
| if (!h) |
| return NULL; |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return NULL; |
| |
| e = plain_bucket_at(h, idx); |
| if (key2) |
| *key2 = (void*) e->b.key; |
| |
| return e->value; |
| } |
| |
| bool _hashmap_contains(HashmapBase *h, const void *key) { |
| unsigned hash; |
| |
| if (!h) |
| return false; |
| |
| hash = bucket_hash(h, key); |
| return bucket_scan(h, hash, key) != IDX_NIL; |
| } |
| |
| void* _hashmap_remove(HashmapBase *h, const void *key) { |
| struct hashmap_base_entry *e; |
| unsigned hash, idx; |
| void *data; |
| |
| if (!h) |
| return NULL; |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return NULL; |
| |
| e = bucket_at(h, idx); |
| data = entry_value(h, e); |
| remove_entry(h, idx); |
| |
| return data; |
| } |
| |
| void* hashmap_remove2(Hashmap *h, const void *key, void **rkey) { |
| struct plain_hashmap_entry *e; |
| unsigned hash, idx; |
| void *data; |
| |
| if (!h) { |
| if (rkey) |
| *rkey = NULL; |
| return NULL; |
| } |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) { |
| if (rkey) |
| *rkey = NULL; |
| return NULL; |
| } |
| |
| e = plain_bucket_at(h, idx); |
| data = e->value; |
| if (rkey) |
| *rkey = (void*) e->b.key; |
| |
| remove_entry(h, idx); |
| |
| return data; |
| } |
| |
| int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) { |
| struct swap_entries swap; |
| struct plain_hashmap_entry *e; |
| unsigned old_hash, new_hash, idx; |
| |
| if (!h) |
| return -ENOENT; |
| |
| old_hash = bucket_hash(h, old_key); |
| idx = bucket_scan(h, old_hash, old_key); |
| if (idx == IDX_NIL) |
| return -ENOENT; |
| |
| new_hash = bucket_hash(h, new_key); |
| if (bucket_scan(h, new_hash, new_key) != IDX_NIL) |
| return -EEXIST; |
| |
| remove_entry(h, idx); |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p; |
| e->b.key = new_key; |
| e->value = value; |
| assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); |
| |
| return 0; |
| } |
| |
| int set_remove_and_put(Set *s, const void *old_key, const void *new_key) { |
| struct swap_entries swap; |
| struct hashmap_base_entry *e; |
| unsigned old_hash, new_hash, idx; |
| |
| if (!s) |
| return -ENOENT; |
| |
| old_hash = bucket_hash(s, old_key); |
| idx = bucket_scan(s, old_hash, old_key); |
| if (idx == IDX_NIL) |
| return -ENOENT; |
| |
| new_hash = bucket_hash(s, new_key); |
| if (bucket_scan(s, new_hash, new_key) != IDX_NIL) |
| return -EEXIST; |
| |
| remove_entry(s, idx); |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p.b; |
| e->key = new_key; |
| assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1); |
| |
| return 0; |
| } |
| |
| int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) { |
| struct swap_entries swap; |
| struct plain_hashmap_entry *e; |
| unsigned old_hash, new_hash, idx_old, idx_new; |
| |
| if (!h) |
| return -ENOENT; |
| |
| old_hash = bucket_hash(h, old_key); |
| idx_old = bucket_scan(h, old_hash, old_key); |
| if (idx_old == IDX_NIL) |
| return -ENOENT; |
| |
| old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key; |
| |
| new_hash = bucket_hash(h, new_key); |
| idx_new = bucket_scan(h, new_hash, new_key); |
| if (idx_new != IDX_NIL) |
| if (idx_old != idx_new) { |
| remove_entry(h, idx_new); |
| /* Compensate for a possible backward shift. */ |
| if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key) |
| idx_old = prev_idx(HASHMAP_BASE(h), idx_old); |
| assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key); |
| } |
| |
| remove_entry(h, idx_old); |
| |
| e = &bucket_at_swap(&swap, IDX_PUT)->p; |
| e->b.key = new_key; |
| e->value = value; |
| assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); |
| |
| return 0; |
| } |
| |
| void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value) { |
| struct hashmap_base_entry *e; |
| unsigned hash, idx; |
| |
| if (!h) |
| return NULL; |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return NULL; |
| |
| e = bucket_at(h, idx); |
| if (entry_value(h, e) != value) |
| return NULL; |
| |
| remove_entry(h, idx); |
| |
| return value; |
| } |
| |
| static unsigned find_first_entry(HashmapBase *h) { |
| Iterator i = ITERATOR_FIRST; |
| |
| if (!h || !n_entries(h)) |
| return IDX_NIL; |
| |
| return hashmap_iterate_entry(h, &i); |
| } |
| |
| void* _hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) { |
| struct hashmap_base_entry *e; |
| void *key, *data; |
| unsigned idx; |
| |
| idx = find_first_entry(h); |
| if (idx == IDX_NIL) { |
| if (ret_key) |
| *ret_key = NULL; |
| return NULL; |
| } |
| |
| e = bucket_at(h, idx); |
| key = (void*) e->key; |
| data = entry_value(h, e); |
| |
| if (remove) |
| remove_entry(h, idx); |
| |
| if (ret_key) |
| *ret_key = key; |
| |
| return data; |
| } |
| |
| unsigned _hashmap_size(HashmapBase *h) { |
| if (!h) |
| return 0; |
| |
| return n_entries(h); |
| } |
| |
| unsigned _hashmap_buckets(HashmapBase *h) { |
| if (!h) |
| return 0; |
| |
| return n_buckets(h); |
| } |
| |
| int _hashmap_merge(Hashmap *h, Hashmap *other) { |
| Iterator i; |
| unsigned idx; |
| |
| assert(h); |
| |
| HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { |
| struct plain_hashmap_entry *pe = plain_bucket_at(other, idx); |
| int r; |
| |
| r = hashmap_put(h, pe->b.key, pe->value); |
| if (r < 0 && r != -EEXIST) |
| return r; |
| } |
| |
| return 0; |
| } |
| |
| int set_merge(Set *s, Set *other) { |
| Iterator i; |
| unsigned idx; |
| |
| assert(s); |
| |
| HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { |
| struct set_entry *se = set_bucket_at(other, idx); |
| int r; |
| |
| r = set_put(s, se->b.key); |
| if (r < 0) |
| return r; |
| } |
| |
| return 0; |
| } |
| |
| int _hashmap_reserve(HashmapBase *h, unsigned entries_add) { |
| int r; |
| |
| assert(h); |
| |
| r = resize_buckets(h, entries_add); |
| if (r < 0) |
| return r; |
| |
| return 0; |
| } |
| |
| /* |
| * The same as hashmap_merge(), but every new item from other is moved to h. |
| * Keys already in h are skipped and stay in other. |
| * Returns: 0 on success. |
| * -ENOMEM on alloc failure, in which case no move has been done. |
| */ |
| int _hashmap_move(HashmapBase *h, HashmapBase *other) { |
| struct swap_entries swap; |
| struct hashmap_base_entry *e, *n; |
| Iterator i; |
| unsigned idx; |
| int r; |
| |
| assert(h); |
| |
| if (!other) |
| return 0; |
| |
| assert(other->type == h->type); |
| |
| /* |
| * This reserves buckets for the worst case, where none of other's |
| * entries are yet present in h. This is preferable to risking |
| * an allocation failure in the middle of the moving and having to |
| * rollback or return a partial result. |
| */ |
| r = resize_buckets(h, n_entries(other)); |
| if (r < 0) |
| return r; |
| |
| HASHMAP_FOREACH_IDX(idx, other, i) { |
| unsigned h_hash; |
| |
| e = bucket_at(other, idx); |
| h_hash = bucket_hash(h, e->key); |
| if (bucket_scan(h, h_hash, e->key) != IDX_NIL) |
| continue; |
| |
| n = &bucket_at_swap(&swap, IDX_PUT)->p.b; |
| n->key = e->key; |
| if (h->type != HASHMAP_TYPE_SET) |
| ((struct plain_hashmap_entry*) n)->value = |
| ((struct plain_hashmap_entry*) e)->value; |
| assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1); |
| |
| remove_entry(other, idx); |
| } |
| |
| return 0; |
| } |
| |
| int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) { |
| struct swap_entries swap; |
| unsigned h_hash, other_hash, idx; |
| struct hashmap_base_entry *e, *n; |
| int r; |
| |
| assert(h); |
| |
| h_hash = bucket_hash(h, key); |
| if (bucket_scan(h, h_hash, key) != IDX_NIL) |
| return -EEXIST; |
| |
| if (!other) |
| return -ENOENT; |
| |
| assert(other->type == h->type); |
| |
| other_hash = bucket_hash(other, key); |
| idx = bucket_scan(other, other_hash, key); |
| if (idx == IDX_NIL) |
| return -ENOENT; |
| |
| e = bucket_at(other, idx); |
| |
| n = &bucket_at_swap(&swap, IDX_PUT)->p.b; |
| n->key = e->key; |
| if (h->type != HASHMAP_TYPE_SET) |
| ((struct plain_hashmap_entry*) n)->value = |
| ((struct plain_hashmap_entry*) e)->value; |
| r = hashmap_put_boldly(h, h_hash, &swap, true); |
| if (r < 0) |
| return r; |
| |
| remove_entry(other, idx); |
| return 0; |
| } |
| |
| HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS) { |
| HashmapBase *copy; |
| int r; |
| |
| assert(h); |
| |
| copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_PASS_ARGS); |
| if (!copy) |
| return NULL; |
| |
| switch (h->type) { |
| case HASHMAP_TYPE_PLAIN: |
| case HASHMAP_TYPE_ORDERED: |
| r = hashmap_merge((Hashmap*)copy, (Hashmap*)h); |
| break; |
| case HASHMAP_TYPE_SET: |
| r = set_merge((Set*)copy, (Set*)h); |
| break; |
| default: |
| assert_not_reached("Unknown hashmap type"); |
| } |
| |
| if (r < 0) |
| return _hashmap_free(copy, false, false); |
| |
| return copy; |
| } |
| |
| char** _hashmap_get_strv(HashmapBase *h) { |
| char **sv; |
| Iterator i; |
| unsigned idx, n; |
| |
| if (!h) |
| return new0(char*, 1); |
| |
| sv = new(char*, n_entries(h)+1); |
| if (!sv) |
| return NULL; |
| |
| n = 0; |
| HASHMAP_FOREACH_IDX(idx, h, i) |
| sv[n++] = entry_value(h, bucket_at(h, idx)); |
| sv[n] = NULL; |
| |
| return sv; |
| } |
| |
| void* ordered_hashmap_next(OrderedHashmap *h, const void *key) { |
| struct ordered_hashmap_entry *e; |
| unsigned hash, idx; |
| |
| if (!h) |
| return NULL; |
| |
| hash = bucket_hash(h, key); |
| idx = bucket_scan(h, hash, key); |
| if (idx == IDX_NIL) |
| return NULL; |
| |
| e = ordered_bucket_at(h, idx); |
| if (e->iterate_next == IDX_NIL) |
| return NULL; |
| return ordered_bucket_at(h, e->iterate_next)->p.value; |
| } |
| |
| int set_consume(Set *s, void *value) { |
| int r; |
| |
| assert(s); |
| assert(value); |
| |
| r = set_put(s, value); |
| if (r <= 0) |
| free(value); |
| |
| return r; |
| } |
| |
| int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS) { |
| int r; |
| |
| r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| _cleanup_free_ char *kdup = NULL, *vdup = NULL; |
| |
| kdup = strdup(k); |
| if (!kdup) |
| return -ENOMEM; |
| |
| if (v) { |
| vdup = strdup(v); |
| if (!vdup) |
| return -ENOMEM; |
| } |
| |
| r = hashmap_put(*h, kdup, vdup); |
| if (r < 0) { |
| if (r == -EEXIST && streq_ptr(v, hashmap_get(*h, kdup))) |
| return 0; |
| return r; |
| } |
| |
| /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */ |
| assert(vdup == NULL || r > 0); |
| if (r > 0) |
| kdup = vdup = NULL; |
| |
| return r; |
| } |
| |
| int _set_put_strdup_full(Set **s, const struct hash_ops *hash_ops, const char *p HASHMAP_DEBUG_PARAMS) { |
| char *c; |
| int r; |
| |
| assert(s); |
| assert(p); |
| |
| r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| if (set_contains(*s, (char*) p)) |
| return 0; |
| |
| c = strdup(p); |
| if (!c) |
| return -ENOMEM; |
| |
| return set_consume(*s, c); |
| } |
| |
| int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS) { |
| int n = 0, r; |
| char **i; |
| |
| assert(s); |
| |
| STRV_FOREACH(i, l) { |
| r = _set_put_strdup_full(s, hash_ops, *i HASHMAP_DEBUG_PASS_ARGS); |
| if (r < 0) |
| return r; |
| |
| n += r; |
| } |
| |
| return n; |
| } |
| |
| int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) { |
| const char *p = v; |
| int r; |
| |
| assert(s); |
| assert(v); |
| |
| for (;;) { |
| char *word; |
| |
| r = extract_first_word(&p, &word, separators, flags); |
| if (r <= 0) |
| return r; |
| |
| r = set_consume(s, word); |
| if (r < 0) |
| return r; |
| } |
| } |
| |
| /* expand the cachemem if needed, return true if newly (re)activated. */ |
| static int cachemem_maintain(CacheMem *mem, size_t size) { |
| assert(mem); |
| |
| if (!GREEDY_REALLOC(mem->ptr, size)) { |
| if (size > 0) |
| return -ENOMEM; |
| } |
| |
| if (!mem->active) { |
| mem->active = true; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) { |
| bool sync_keys = false, sync_values = false; |
| size_t size; |
| int r; |
| |
| assert(cache); |
| assert(cache->hashmap); |
| |
| size = n_entries(cache->hashmap); |
| |
| if (res_keys) { |
| r = cachemem_maintain(&cache->keys, size); |
| if (r < 0) |
| return r; |
| |
| sync_keys = r; |
| } else |
| cache->keys.active = false; |
| |
| if (res_values) { |
| r = cachemem_maintain(&cache->values, size); |
| if (r < 0) |
| return r; |
| |
| sync_values = r; |
| } else |
| cache->values.active = false; |
| |
| if (cache->hashmap->dirty) { |
| if (cache->keys.active) |
| sync_keys = true; |
| if (cache->values.active) |
| sync_values = true; |
| |
| cache->hashmap->dirty = false; |
| } |
| |
| if (sync_keys || sync_values) { |
| unsigned i, idx; |
| Iterator iter; |
| |
| i = 0; |
| HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) { |
| struct hashmap_base_entry *e; |
| |
| e = bucket_at(cache->hashmap, idx); |
| |
| if (sync_keys) |
| cache->keys.ptr[i] = e->key; |
| if (sync_values) |
| cache->values.ptr[i] = entry_value(cache->hashmap, e); |
| i++; |
| } |
| } |
| |
| if (res_keys) |
| *res_keys = cache->keys.ptr; |
| if (res_values) |
| *res_values = cache->values.ptr; |
| if (res_n_entries) |
| *res_n_entries = size; |
| |
| return 0; |
| } |
| |
| IteratedCache* iterated_cache_free(IteratedCache *cache) { |
| if (cache) { |
| free(cache->keys.ptr); |
| free(cache->values.ptr); |
| } |
| |
| return mfree(cache); |
| } |
| |
| int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret) { |
| _cleanup_free_ char *str = NULL; |
| size_t separator_len, len = 0; |
| const char *value; |
| bool first; |
| |
| assert(ret); |
| |
| if (set_isempty(s)) { |
| *ret = NULL; |
| return 0; |
| } |
| |
| separator_len = strlen_ptr(separator); |
| |
| if (separator_len == 0) |
| wrap_with_separator = false; |
| |
| first = !wrap_with_separator; |
| |
| SET_FOREACH(value, s) { |
| size_t l = strlen_ptr(value); |
| |
| if (l == 0) |
| continue; |
| |
| if (!GREEDY_REALLOC(str, len + l + (first ? 0 : separator_len) + (wrap_with_separator ? separator_len : 0) + 1)) |
| return -ENOMEM; |
| |
| if (separator_len > 0 && !first) { |
| memcpy(str + len, separator, separator_len); |
| len += separator_len; |
| } |
| |
| memcpy(str + len, value, l); |
| len += l; |
| first = false; |
| } |
| |
| if (wrap_with_separator) { |
| memcpy(str + len, separator, separator_len); |
| len += separator_len; |
| } |
| |
| str[len] = '\0'; |
| |
| *ret = TAKE_PTR(str); |
| return 0; |
| } |
| |
| bool set_equal(Set *a, Set *b) { |
| void *p; |
| |
| /* Checks whether each entry of 'a' is also in 'b' and vice versa, i.e. the two sets contain the same |
| * entries */ |
| |
| if (a == b) |
| return true; |
| |
| if (set_isempty(a) && set_isempty(b)) |
| return true; |
| |
| if (set_size(a) != set_size(b)) /* Cheap check that hopefully catches a lot of inequality cases |
| * already */ |
| return false; |
| |
| SET_FOREACH(p, a) |
| if (!set_contains(b, p)) |
| return false; |
| |
| /* If we have the same hashops, then we don't need to check things backwards given we compared the |
| * size and that all of a is in b. */ |
| if (a->b.hash_ops == b->b.hash_ops) |
| return true; |
| |
| SET_FOREACH(p, b) |
| if (!set_contains(a, p)) |
| return false; |
| |
| return true; |
| } |