| /* Simple NUMA library. |
| Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and |
| Cliff Wickman,SGI. |
| |
| libnuma is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; version |
| 2.1. |
| |
| libnuma is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should find a copy of v2.1 of the GNU Lesser General Public License |
| somewhere on your Linux system; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| |
| All calls are undefined when numa_available returns an error. */ |
| #define _GNU_SOURCE 1 |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <string.h> |
| #include <sched.h> |
| #include <dirent.h> |
| #include <errno.h> |
| #include <stdarg.h> |
| #include <ctype.h> |
| |
| #include <sys/mman.h> |
| #include <limits.h> |
| |
| #ifdef MEMORY_SANITIZER |
| #include <sanitizer/msan_interface.h> |
| #endif |
| |
| #include "config.h" |
| #include "numa.h" |
| #include "numaif.h" |
| #include "numaint.h" |
| #include "util.h" |
| #include "affinity.h" |
| |
| #define WEAK __attribute__((weak)) |
| |
| #define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */ |
| |
| /* these are the old (version 1) masks */ |
| nodemask_t numa_no_nodes; |
| nodemask_t numa_all_nodes; |
| /* these are now the default bitmask (pointers to) (version 2) */ |
| struct bitmask *numa_no_nodes_ptr = NULL; |
| struct bitmask *numa_all_nodes_ptr = NULL; |
| struct bitmask *numa_possible_nodes_ptr = NULL; |
| struct bitmask *numa_all_cpus_ptr = NULL; |
| struct bitmask *numa_possible_cpus_ptr = NULL; |
| /* I would prefer to use symbol versioning to create v1 and v2 versions |
| of numa_no_nodes and numa_all_nodes, but the loader does not correctly |
| handle versioning of BSS versus small data items */ |
| |
| struct bitmask *numa_nodes_ptr = NULL; |
| static struct bitmask *numa_memnode_ptr = NULL; |
| static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES]; |
| static struct bitmask **node_cpu_mask_v2; |
| |
| WEAK void numa_error(const char *where); |
| |
| #ifndef TLS |
| #warning "not threadsafe" |
| #define __thread |
| #endif |
| |
| static __thread int bind_policy = MPOL_BIND; |
| static __thread unsigned int mbind_flags = 0; |
| static int sizes_set=0; |
| static int maxconfigurednode = -1; |
| static int maxconfiguredcpu = -1; |
| static int numprocnode = -1; |
| static int numproccpu = -1; |
| static int nodemask_sz = 0; |
| static int cpumask_sz = 0; |
| |
| int numa_exit_on_error = 0; |
| int numa_exit_on_warn = 0; |
| static void set_sizes(void); |
| |
| /* |
| * There are two special functions, _init(void) and _fini(void), which |
| * are called automatically by the dynamic loader whenever a library is loaded. |
| * |
| * The v1 library depends upon nodemask_t's of all nodes and no nodes. |
| */ |
| void __attribute__((constructor)) |
| numa_init(void) |
| { |
| int max,i; |
| |
| if (sizes_set) |
| return; |
| |
| set_sizes(); |
| /* numa_all_nodes should represent existing nodes on this system */ |
| max = numa_num_configured_nodes(); |
| for (i = 0; i < max; i++) |
| nodemask_set_compat((nodemask_t *)&numa_all_nodes, i); |
| memset(&numa_no_nodes, 0, sizeof(numa_no_nodes)); |
| } |
| |
| #define FREE_AND_ZERO(x) if (x) { \ |
| numa_bitmask_free(x); \ |
| x = NULL; \ |
| } |
| |
| void __attribute__((destructor)) |
| numa_fini(void) |
| { |
| FREE_AND_ZERO(numa_all_cpus_ptr); |
| FREE_AND_ZERO(numa_possible_cpus_ptr); |
| FREE_AND_ZERO(numa_all_nodes_ptr); |
| FREE_AND_ZERO(numa_possible_nodes_ptr); |
| FREE_AND_ZERO(numa_no_nodes_ptr); |
| FREE_AND_ZERO(numa_memnode_ptr); |
| FREE_AND_ZERO(numa_nodes_ptr); |
| } |
| |
| /* |
| * The following bitmask declarations, bitmask_*() routines, and associated |
| * _setbit() and _getbit() routines are: |
| * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved. |
| * SGI publishes it under the terms of the GNU General Public License, v2, |
| * as published by the Free Software Foundation. |
| */ |
| static unsigned int |
| _getbit(const struct bitmask *bmp, unsigned int n) |
| { |
| if (n < bmp->size) |
| return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1; |
| else |
| return 0; |
| } |
| |
| static void |
| _setbit(struct bitmask *bmp, unsigned int n, unsigned int v) |
| { |
| if (n < bmp->size) { |
| if (v) |
| bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong); |
| else |
| bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong)); |
| } |
| } |
| |
| int |
| numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i) |
| { |
| return _getbit(bmp, i); |
| } |
| |
| struct bitmask * |
| numa_bitmask_setall(struct bitmask *bmp) |
| { |
| unsigned int i; |
| for (i = 0; i < bmp->size; i++) |
| _setbit(bmp, i, 1); |
| return bmp; |
| } |
| |
| struct bitmask * |
| numa_bitmask_clearall(struct bitmask *bmp) |
| { |
| unsigned int i; |
| for (i = 0; i < bmp->size; i++) |
| _setbit(bmp, i, 0); |
| return bmp; |
| } |
| |
| struct bitmask * |
| numa_bitmask_setbit(struct bitmask *bmp, unsigned int i) |
| { |
| _setbit(bmp, i, 1); |
| return bmp; |
| } |
| |
| struct bitmask * |
| numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i) |
| { |
| _setbit(bmp, i, 0); |
| return bmp; |
| } |
| |
| unsigned int |
| numa_bitmask_nbytes(struct bitmask *bmp) |
| { |
| return longsperbits(bmp->size) * sizeof(unsigned long); |
| } |
| |
| /* where n is the number of bits in the map */ |
| /* This function should not exit on failure, but right now we cannot really |
| recover from this. */ |
| struct bitmask * |
| numa_bitmask_alloc(unsigned int n) |
| { |
| struct bitmask *bmp; |
| |
| if (n < 1) { |
| errno = EINVAL; |
| numa_error("request to allocate mask for invalid number"); |
| exit(1); |
| } |
| bmp = malloc(sizeof(*bmp)); |
| if (bmp == 0) |
| goto oom; |
| bmp->size = n; |
| bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); |
| if (bmp->maskp == 0) { |
| free(bmp); |
| goto oom; |
| } |
| return bmp; |
| |
| oom: |
| numa_error("Out of memory allocating bitmask"); |
| exit(1); |
| } |
| |
| void |
| numa_bitmask_free(struct bitmask *bmp) |
| { |
| if (bmp == 0) |
| return; |
| free(bmp->maskp); |
| bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ |
| free(bmp); |
| return; |
| } |
| |
| /* True if two bitmasks are equal */ |
| int |
| numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2) |
| { |
| unsigned int i; |
| for (i = 0; i < bmp1->size || i < bmp2->size; i++) |
| if (_getbit(bmp1, i) != _getbit(bmp2, i)) |
| return 0; |
| return 1; |
| } |
| |
| /* Hamming Weight: number of set bits */ |
| unsigned int numa_bitmask_weight(const struct bitmask *bmp) |
| { |
| unsigned int i; |
| unsigned int w = 0; |
| for (i = 0; i < bmp->size; i++) |
| if (_getbit(bmp, i)) |
| w++; |
| return w; |
| } |
| |
| /* *****end of bitmask_ routines ************ */ |
| |
| /* Next two can be overwritten by the application for different error handling */ |
| WEAK void numa_error(const char *where) |
| { |
| int olde = errno; |
| perror(where); |
| if (numa_exit_on_error) |
| exit(1); |
| errno = olde; |
| } |
| |
| WEAK void numa_warn(int num, const char *fmt, ...) |
| { |
| static unsigned warned; |
| va_list ap; |
| int olde = errno; |
| |
| /* Give each warning only once */ |
| if ((1<<num) & warned) |
| return; |
| warned |= (1<<num); |
| |
| va_start(ap,fmt); |
| fprintf(stderr, "libnuma: Warning: "); |
| vfprintf(stderr, fmt, ap); |
| fputc('\n', stderr); |
| va_end(ap); |
| |
| errno = olde; |
| } |
| |
| static void setpol(int policy, struct bitmask *bmp) |
| { |
| if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0) |
| numa_error("set_mempolicy"); |
| } |
| |
| static void getpol(int *oldpolicy, struct bitmask *bmp) |
| { |
| if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0) |
| numa_error("get_mempolicy"); |
| } |
| |
| static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp) |
| { |
| if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0, |
| mbind_flags) < 0) |
| numa_error("mbind"); |
| } |
| |
| /* (undocumented) */ |
| /* gives the wrong answer for hugetlbfs mappings. */ |
| int numa_pagesize(void) |
| { |
| static int pagesize; |
| if (pagesize > 0) |
| return pagesize; |
| pagesize = getpagesize(); |
| return pagesize; |
| } |
| |
| make_internal_alias(numa_pagesize); |
| |
| /* |
| * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr) |
| * and the highest numbered existing node (maxconfigurednode). |
| */ |
| static void |
| set_configured_nodes(void) |
| { |
| DIR *d; |
| struct dirent *de; |
| long long freep; |
| |
| numa_memnode_ptr = numa_allocate_nodemask(); |
| numa_nodes_ptr = numa_allocate_nodemask(); |
| |
| d = opendir("/sys/devices/system/node"); |
| if (!d) { |
| maxconfigurednode = 0; |
| } else { |
| while ((de = readdir(d)) != NULL) { |
| int nd; |
| if (strncmp(de->d_name, "node", 4)) |
| continue; |
| nd = strtoul(de->d_name+4, NULL, 0); |
| numa_bitmask_setbit(numa_nodes_ptr, nd); |
| if (numa_node_size64(nd, &freep) > 0) |
| numa_bitmask_setbit(numa_memnode_ptr, nd); |
| if (maxconfigurednode < nd) |
| maxconfigurednode = nd; |
| } |
| closedir(d); |
| } |
| } |
| |
| /* |
| * Convert the string length of an ascii hex mask to the number |
| * of bits represented by that mask. |
| */ |
| static int s2nbits(const char *s) |
| { |
| return strlen(s) * 32 / 9; |
| } |
| |
| /* Is string 'pre' a prefix of string 's'? */ |
| static int strprefix(const char *s, const char *pre) |
| { |
| return strncmp(s, pre, strlen(pre)) == 0; |
| } |
| |
| static const char *mask_size_file = "/proc/self/status"; |
| static const char *nodemask_prefix = "Mems_allowed:\t"; |
| /* |
| * (do this the way Paul Jackson's libcpuset does it) |
| * The nodemask values in /proc/self/status are in an |
| * ascii format that uses 9 characters for each 32 bits of mask. |
| * (this could also be used to find the cpumask size) |
| */ |
| static void |
| set_nodemask_size(void) |
| { |
| FILE *fp; |
| char *buf = NULL; |
| size_t bufsize = 0; |
| |
| if ((fp = fopen(mask_size_file, "r")) == NULL) |
| goto done; |
| |
| while (getline(&buf, &bufsize, fp) > 0) { |
| #ifdef MEMORY_SANITIZER |
| __msan_unpoison_string(buf); |
| #endif |
| if (strprefix(buf, nodemask_prefix)) { |
| nodemask_sz = s2nbits(buf + strlen(nodemask_prefix)); |
| break; |
| } |
| } |
| free(buf); |
| fclose(fp); |
| done: |
| if (nodemask_sz == 0) {/* fall back on error */ |
| int pol; |
| unsigned long *mask = NULL; |
| nodemask_sz = 16; |
| do { |
| nodemask_sz <<= 1; |
| mask = realloc(mask, nodemask_sz / 8); |
| if (!mask) |
| return; |
| } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL && |
| nodemask_sz < 4096*8); |
| free(mask); |
| } |
| } |
| |
| /* |
| * Read a mask consisting of a sequence of hexadecimal longs separated by |
| * commas. Order them correctly and return the number of bits set. |
| */ |
| static int |
| read_mask(char *s, struct bitmask *bmp) |
| { |
| char *end = s; |
| int tmplen = (bmp->size + bitsperint - 1) / bitsperint; |
| unsigned int tmp[tmplen]; |
| unsigned int *start = tmp; |
| unsigned int i, n = 0, m = 0; |
| |
| if (!s) |
| return 0; /* shouldn't happen */ |
| |
| i = strtoul(s, &end, 16); |
| |
| /* Skip leading zeros */ |
| while (!i && *end++ == ',') { |
| i = strtoul(end, &end, 16); |
| } |
| |
| if (!i) |
| /* End of string. No mask */ |
| return -1; |
| |
| start[n++] = i; |
| /* Read sequence of ints */ |
| while (*end++ == ',') { |
| i = strtoul(end, &end, 16); |
| start[n++] = i; |
| |
| /* buffer overflow */ |
| if (n > tmplen) |
| return -1; |
| } |
| |
| /* |
| * Invert sequence of ints if necessary since the first int |
| * is the highest and we put it first because we read it first. |
| */ |
| while (n) { |
| int w; |
| unsigned long x = 0; |
| /* read into long values in an endian-safe way */ |
| for (w = 0; n && w < bitsperlong; w += bitsperint) |
| x |= ((unsigned long)start[n-- - 1] << w); |
| |
| bmp->maskp[m++] = x; |
| } |
| /* |
| * Return the number of bits set |
| */ |
| return numa_bitmask_weight(bmp); |
| } |
| |
| /* |
| * Read a processes constraints in terms of nodes and cpus from |
| * /proc/self/status. |
| */ |
| static void |
| set_task_constraints(void) |
| { |
| int hicpu = maxconfiguredcpu; |
| int i; |
| char *buffer = NULL; |
| size_t buflen = 0; |
| FILE *f; |
| |
| numa_all_cpus_ptr = numa_allocate_cpumask(); |
| numa_possible_cpus_ptr = numa_allocate_cpumask(); |
| numa_all_nodes_ptr = numa_allocate_nodemask(); |
| numa_possible_nodes_ptr = numa_allocate_cpumask(); |
| numa_no_nodes_ptr = numa_allocate_nodemask(); |
| |
| f = fopen(mask_size_file, "r"); |
| if (!f) { |
| //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file); |
| return; |
| } |
| |
| while (getline(&buffer, &buflen, f) > 0) { |
| #ifdef MEMORY_SANITIZER |
| __msan_unpoison_string(buffer); |
| #endif |
| /* mask starts after [last] tab */ |
| char *mask = strrchr(buffer,'\t') + 1; |
| |
| if (strncmp(buffer,"Cpus_allowed:",13) == 0) |
| numproccpu = read_mask(mask, numa_all_cpus_ptr); |
| |
| if (strncmp(buffer,"Mems_allowed:",13) == 0) { |
| numprocnode = read_mask(mask, numa_all_nodes_ptr); |
| } |
| } |
| fclose(f); |
| free(buffer); |
| |
| for (i = 0; i <= hicpu; i++) |
| numa_bitmask_setbit(numa_possible_cpus_ptr, i); |
| for (i = 0; i <= maxconfigurednode; i++) |
| numa_bitmask_setbit(numa_possible_nodes_ptr, i); |
| |
| /* |
| * Cpus_allowed in the kernel can be defined to all f's |
| * i.e. it may be a superset of the actual available processors. |
| * As such let's reduce numproccpu to the number of actual |
| * available cpus. |
| */ |
| if (numproccpu <= 0) { |
| for (i = 0; i <= hicpu; i++) |
| numa_bitmask_setbit(numa_all_cpus_ptr, i); |
| numproccpu = hicpu+1; |
| } |
| |
| if (numproccpu > hicpu+1) { |
| numproccpu = hicpu+1; |
| for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) { |
| numa_bitmask_clearbit(numa_all_cpus_ptr, i); |
| } |
| } |
| |
| if (numprocnode <= 0) { |
| for (i = 0; i <= maxconfigurednode; i++) |
| numa_bitmask_setbit(numa_all_nodes_ptr, i); |
| numprocnode = maxconfigurednode + 1; |
| } |
| |
| return; |
| } |
| |
| /* |
| * Find the highest cpu number possible (in other words the size |
| * of a kernel cpumask_t (in bits) - 1) |
| */ |
| static void |
| set_numa_max_cpu(void) |
| { |
| int len = 4096; |
| int n; |
| int olde = errno; |
| struct bitmask *buffer; |
| |
| do { |
| buffer = numa_bitmask_alloc(len); |
| n = numa_sched_getaffinity_v2_int(0, buffer); |
| /* on success, returns size of kernel cpumask_t, in bytes */ |
| if (n < 0) { |
| if (errno == EINVAL) { |
| if (len >= 1024*1024) |
| break; |
| len *= 2; |
| numa_bitmask_free(buffer); |
| continue; |
| } else { |
| numa_warn(W_numcpus, "Unable to determine max cpu" |
| " (sched_getaffinity: %s); guessing...", |
| strerror(errno)); |
| n = sizeof(cpu_set_t); |
| break; |
| } |
| } |
| } while (n < 0); |
| numa_bitmask_free(buffer); |
| errno = olde; |
| cpumask_sz = n*8; |
| } |
| |
| /* |
| * get the total (configured) number of cpus - both online and offline |
| */ |
| static void |
| set_configured_cpus(void) |
| { |
| maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1; |
| if (maxconfiguredcpu == -1) |
| numa_error("sysconf(NPROCESSORS_CONF) failed"); |
| } |
| |
| /* |
| * Initialize all the sizes. |
| */ |
| static void |
| set_sizes(void) |
| { |
| sizes_set++; |
| set_nodemask_size(); /* size of kernel nodemask_t */ |
| set_configured_nodes(); /* configured nodes listed in /sys */ |
| set_numa_max_cpu(); /* size of kernel cpumask_t */ |
| set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */ |
| set_task_constraints(); /* cpus and nodes for current task */ |
| } |
| |
| int |
| numa_num_configured_nodes(void) |
| { |
| /* |
| * NOTE: this function's behavior matches the documentation (ie: it |
| * returns a count of nodes with memory) despite the poor function |
| * naming. We also cannot use the similarly poorly named |
| * numa_all_nodes_ptr as it only tracks nodes with memory from which |
| * the calling process can allocate. Think sparse nodes, memory-less |
| * nodes, cpusets... |
| */ |
| int memnodecount=0, i; |
| |
| for (i=0; i <= maxconfigurednode; i++) { |
| if (numa_bitmask_isbitset(numa_memnode_ptr, i)) |
| memnodecount++; |
| } |
| return memnodecount; |
| } |
| |
| int |
| numa_num_configured_cpus(void) |
| { |
| |
| return maxconfiguredcpu+1; |
| } |
| |
| int |
| numa_num_possible_nodes(void) |
| { |
| return nodemask_sz; |
| } |
| |
| int |
| numa_num_possible_cpus(void) |
| { |
| return cpumask_sz; |
| } |
| |
| int |
| numa_num_task_nodes(void) |
| { |
| return numprocnode; |
| } |
| |
| /* |
| * for backward compatibility |
| */ |
| int |
| numa_num_thread_nodes(void) |
| { |
| return numa_num_task_nodes(); |
| } |
| |
| int |
| numa_num_task_cpus(void) |
| { |
| return numproccpu; |
| } |
| |
| /* |
| * for backward compatibility |
| */ |
| int |
| numa_num_thread_cpus(void) |
| { |
| return numa_num_task_cpus(); |
| } |
| |
| /* |
| * Return the number of the highest node in this running system, |
| */ |
| int |
| numa_max_node(void) |
| { |
| return maxconfigurednode; |
| } |
| |
| make_internal_alias(numa_max_node); |
| |
| /* |
| * Return the number of the highest possible node in a system, |
| * which for v1 is the size of a numa.h nodemask_t(in bits)-1. |
| * but for v2 is the size of a kernel nodemask_t(in bits)-1. |
| */ |
| int |
| numa_max_possible_node_v1(void) |
| { |
| return ((sizeof(nodemask_t)*8)-1); |
| } |
| backward_symver(numa_max_possible_node_v1,numa_max_possible_node); |
| |
| int |
| numa_max_possible_node_v2(void) |
| { |
| return numa_num_possible_nodes()-1; |
| } |
| symver(numa_max_possible_node_v2,numa_max_possible_node); |
| |
| make_internal_alias(numa_max_possible_node_v1); |
| make_internal_alias(numa_max_possible_node_v2); |
| |
| /* |
| * Allocate a bitmask for cpus, of a size large enough to |
| * match the kernel's cpumask_t. |
| */ |
| struct bitmask * |
| numa_allocate_cpumask() |
| { |
| int ncpus = numa_num_possible_cpus(); |
| |
| return numa_bitmask_alloc(ncpus); |
| } |
| |
| /* |
| * Allocate a bitmask the size of a libnuma nodemask_t |
| */ |
| static struct bitmask * |
| allocate_nodemask_v1(void) |
| { |
| int nnodes = numa_max_possible_node_v1_int()+1; |
| |
| return numa_bitmask_alloc(nnodes); |
| } |
| |
| /* |
| * Allocate a bitmask for nodes, of a size large enough to |
| * match the kernel's nodemask_t. |
| */ |
| struct bitmask * |
| numa_allocate_nodemask(void) |
| { |
| struct bitmask *bmp; |
| int nnodes = numa_max_possible_node_v2_int() + 1; |
| |
| bmp = numa_bitmask_alloc(nnodes); |
| return bmp; |
| } |
| |
| /* (cache the result?) */ |
| long long numa_node_size64(int node, long long *freep) |
| { |
| size_t len = 0; |
| char *line = NULL; |
| long long size = -1; |
| FILE *f; |
| char fn[64]; |
| int ok = 0; |
| int required = freep ? 2 : 1; |
| |
| if (freep) |
| *freep = -1; |
| sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); |
| f = fopen(fn, "r"); |
| if (!f) |
| return -1; |
| while (getdelim(&line, &len, '\n', f) > 0) { |
| char *end; |
| char *s = strcasestr(line, "kB"); |
| if (!s) |
| continue; |
| --s; |
| while (s > line && isspace(*s)) |
| --s; |
| while (s > line && isdigit(*s)) |
| --s; |
| if (strstr(line, "MemTotal")) { |
| size = strtoull(s,&end,0) << 10; |
| if (end == s) |
| size = -1; |
| else |
| ok++; |
| } |
| if (freep && strstr(line, "MemFree")) { |
| *freep = strtoull(s,&end,0) << 10; |
| if (end == s) |
| *freep = -1; |
| else |
| ok++; |
| } |
| } |
| fclose(f); |
| free(line); |
| if (ok != required) |
| numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok); |
| return size; |
| } |
| |
| make_internal_alias(numa_node_size64); |
| |
| long numa_node_size(int node, long *freep) |
| { |
| long long f2; |
| long sz = numa_node_size64_int(node, &f2); |
| if (freep) |
| *freep = f2; |
| return sz; |
| } |
| |
| int numa_available(void) |
| { |
| if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS) |
| return -1; |
| return 0; |
| } |
| |
| void |
| numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
| { |
| struct bitmask bitmask; |
| |
| bitmask.size = sizeof(nodemask_t) * 8; |
| bitmask.maskp = (unsigned long *)mask; |
| dombind(mem, size, MPOL_INTERLEAVE, &bitmask); |
| } |
| backward_symver(numa_interleave_memory_v1,numa_interleave_memory); |
| |
| void |
| numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
| { |
| dombind(mem, size, MPOL_INTERLEAVE, bmp); |
| } |
| symver(numa_interleave_memory_v2,numa_interleave_memory); |
| |
| void numa_tonode_memory(void *mem, size_t size, int node) |
| { |
| struct bitmask *nodes; |
| |
| nodes = numa_allocate_nodemask(); |
| numa_bitmask_setbit(nodes, node); |
| dombind(mem, size, bind_policy, nodes); |
| numa_bitmask_free(nodes); |
| } |
| |
| void |
| numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
| { |
| struct bitmask bitmask; |
| |
| bitmask.maskp = (unsigned long *)mask; |
| bitmask.size = sizeof(nodemask_t); |
| dombind(mem, size, bind_policy, &bitmask); |
| } |
| backward_symver(numa_tonodemask_memory_v1,numa_tonodemask_memory); |
| |
| void |
| numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
| { |
| dombind(mem, size, bind_policy, bmp); |
| } |
| symver(numa_tonodemask_memory_v2,numa_tonodemask_memory); |
| |
| void numa_setlocal_memory(void *mem, size_t size) |
| { |
| dombind(mem, size, MPOL_PREFERRED, NULL); |
| } |
| |
| void numa_police_memory(void *mem, size_t size) |
| { |
| int pagesize = numa_pagesize_int(); |
| unsigned long i; |
| for (i = 0; i < size; i += pagesize) |
| ((volatile char*)mem)[i] = ((volatile char*)mem)[i]; |
| } |
| |
| make_internal_alias(numa_police_memory); |
| |
| void *numa_alloc(size_t size) |
| { |
| char *mem; |
| mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
| 0, 0); |
| if (mem == (char *)-1) |
| return NULL; |
| numa_police_memory_int(mem, size); |
| return mem; |
| } |
| |
| void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) |
| { |
| char *mem; |
| mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); |
| if (mem == (char *)-1) |
| return NULL; |
| /* |
| * The memory policy of the allocated pages is preserved by mremap(), so |
| * there is no need to (re)set it here. If the policy of the original |
| * allocation is not set, the new pages will be allocated according to the |
| * process' mempolicy. Trying to allocate explicitly the new pages on the |
| * same node as the original ones would require changing the policy of the |
| * newly allocated pages, which violates the numa_realloc() semantics. |
| */ |
| return mem; |
| } |
| |
| void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) |
| { |
| char *mem; |
| struct bitmask bitmask; |
| |
| mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
| 0, 0); |
| if (mem == (char *)-1) |
| return NULL; |
| bitmask.maskp = (unsigned long *)mask; |
| bitmask.size = sizeof(nodemask_t); |
| dombind(mem, size, MPOL_INTERLEAVE, &bitmask); |
| return mem; |
| } |
| backward_symver(numa_alloc_interleaved_subset_v1,numa_alloc_interleaved_subset); |
| |
| void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp) |
| { |
| char *mem; |
| |
| mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
| 0, 0); |
| if (mem == (char *)-1) |
| return NULL; |
| dombind(mem, size, MPOL_INTERLEAVE, bmp); |
| return mem; |
| } |
| symver(numa_alloc_interleaved_subset_v2,numa_alloc_interleaved_subset); |
| |
| make_internal_alias(numa_alloc_interleaved_subset_v1); |
| make_internal_alias(numa_alloc_interleaved_subset_v2); |
| |
| void * |
| numa_alloc_interleaved(size_t size) |
| { |
| return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr); |
| } |
| |
| /* |
| * given a user node mask, set memory policy to use those nodes |
| */ |
| void |
| numa_set_interleave_mask_v1(nodemask_t *mask) |
| { |
| struct bitmask *bmp; |
| int nnodes = numa_max_possible_node_v1_int()+1; |
| |
| bmp = numa_bitmask_alloc(nnodes); |
| copy_nodemask_to_bitmask(mask, bmp); |
| if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
| setpol(MPOL_DEFAULT, bmp); |
| else |
| setpol(MPOL_INTERLEAVE, bmp); |
| numa_bitmask_free(bmp); |
| } |
| |
| backward_symver(numa_set_interleave_mask_v1,numa_set_interleave_mask); |
| |
| void |
| numa_set_interleave_mask_v2(struct bitmask *bmp) |
| { |
| if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
| setpol(MPOL_DEFAULT, bmp); |
| else |
| setpol(MPOL_INTERLEAVE, bmp); |
| } |
| symver(numa_set_interleave_mask_v2,numa_set_interleave_mask); |
| |
| nodemask_t |
| numa_get_interleave_mask_v1(void) |
| { |
| int oldpolicy = -1; |
| struct bitmask *bmp; |
| nodemask_t mask; |
| |
| bmp = allocate_nodemask_v1(); |
| getpol(&oldpolicy, bmp); |
| if (oldpolicy == MPOL_INTERLEAVE) |
| copy_bitmask_to_nodemask(bmp, &mask); |
| else |
| copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask); |
| numa_bitmask_free(bmp); |
| return mask; |
| } |
| backward_symver(numa_get_interleave_mask_v1,numa_get_interleave_mask); |
| |
| struct bitmask * |
| numa_get_interleave_mask_v2(void) |
| { |
| int oldpolicy = -1; |
| struct bitmask *bmp; |
| |
| bmp = numa_allocate_nodemask(); |
| getpol(&oldpolicy, bmp); |
| if (oldpolicy != MPOL_INTERLEAVE) |
| copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
| return bmp; |
| } |
| symver(numa_get_interleave_mask_v2,numa_get_interleave_mask); |
| |
| /* (undocumented) */ |
| int numa_get_interleave_node(void) |
| { |
| int nd; |
| if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0) |
| return nd; |
| return 0; |
| } |
| |
| void *numa_alloc_onnode(size_t size, int node) |
| { |
| char *mem; |
| struct bitmask *bmp; |
| |
| bmp = numa_allocate_nodemask(); |
| numa_bitmask_setbit(bmp, node); |
| mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
| 0, 0); |
| if (mem == (char *)-1) |
| mem = NULL; |
| else |
| dombind(mem, size, bind_policy, bmp); |
| numa_bitmask_free(bmp); |
| return mem; |
| } |
| |
| void *numa_alloc_local(size_t size) |
| { |
| char *mem; |
| mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
| 0, 0); |
| if (mem == (char *)-1) |
| mem = NULL; |
| else |
| dombind(mem, size, MPOL_PREFERRED, NULL); |
| return mem; |
| } |
| |
| void numa_set_bind_policy(int strict) |
| { |
| if (strict) |
| bind_policy = MPOL_BIND; |
| else |
| bind_policy = MPOL_PREFERRED; |
| } |
| |
| void |
| numa_set_membind_v1(const nodemask_t *mask) |
| { |
| struct bitmask bitmask; |
| |
| bitmask.maskp = (unsigned long *)mask; |
| bitmask.size = sizeof(nodemask_t); |
| setpol(MPOL_BIND, &bitmask); |
| } |
| backward_symver(numa_set_membind_v1,numa_set_membind); |
| |
| void |
| numa_set_membind_v2(struct bitmask *bmp) |
| { |
| setpol(MPOL_BIND, bmp); |
| } |
| symver(numa_set_membind_v2,numa_set_membind); |
| |
| make_internal_alias(numa_set_membind_v2); |
| |
| /* |
| * copy a bitmask map body to a numa.h nodemask_t structure |
| */ |
| void |
| copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp) |
| { |
| int max, i; |
| |
| memset(nmp, 0, sizeof(nodemask_t)); |
| max = (sizeof(nodemask_t)*8); |
| for (i=0; i<bmp->size; i++) { |
| if (i >= max) |
| break; |
| if (numa_bitmask_isbitset(bmp, i)) |
| nodemask_set_compat((nodemask_t *)nmp, i); |
| } |
| } |
| |
| /* |
| * copy a bitmask map body to another bitmask body |
| * fill a larger destination with zeroes |
| */ |
| void |
| copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto) |
| { |
| int bytes; |
| |
| if (bmpfrom->size >= bmpto->size) { |
| memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size)); |
| } else if (bmpfrom->size < bmpto->size) { |
| bytes = CPU_BYTES(bmpfrom->size); |
| memcpy(bmpto->maskp, bmpfrom->maskp, bytes); |
| memset(((char *)bmpto->maskp)+bytes, 0, |
| CPU_BYTES(bmpto->size)-bytes); |
| } |
| } |
| |
| /* |
| * copy a numa.h nodemask_t structure to a bitmask map body |
| */ |
| void |
| copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp) |
| { |
| int max, i; |
| |
| numa_bitmask_clearall(bmp); |
| max = (sizeof(nodemask_t)*8); |
| if (max > bmp->size) |
| max = bmp->size; |
| for (i=0; i<max; i++) { |
| if (nodemask_isset_compat(nmp, i)) |
| numa_bitmask_setbit(bmp, i); |
| } |
| } |
| |
| nodemask_t |
| numa_get_membind_v1(void) |
| { |
| int oldpolicy = -1; |
| struct bitmask *bmp; |
| nodemask_t nmp; |
| |
| bmp = allocate_nodemask_v1(); |
| getpol(&oldpolicy, bmp); |
| if (oldpolicy == MPOL_BIND) { |
| copy_bitmask_to_nodemask(bmp, &nmp); |
| } else { |
| /* copy the body of the map to numa_all_nodes */ |
| copy_bitmask_to_nodemask(bmp, &numa_all_nodes); |
| nmp = numa_all_nodes; |
| } |
| numa_bitmask_free(bmp); |
| return nmp; |
| } |
| backward_symver(numa_get_membind_v1,numa_get_membind); |
| |
| struct bitmask * |
| numa_get_membind_v2(void) |
| { |
| int oldpolicy = -1; |
| struct bitmask *bmp; |
| |
| bmp = numa_allocate_nodemask(); |
| getpol(&oldpolicy, bmp); |
| if (oldpolicy != MPOL_BIND) |
| copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp); |
| return bmp; |
| } |
| symver(numa_get_membind_v2,numa_get_membind); |
| |
| //TODO: do we need a v1 nodemask_t version? |
| struct bitmask *numa_get_mems_allowed(void) |
| { |
| struct bitmask *bmp; |
| |
| /* |
| * can change, so query on each call. |
| */ |
| bmp = numa_allocate_nodemask(); |
| if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0, |
| MPOL_F_MEMS_ALLOWED) < 0) |
| numa_error("get_mempolicy"); |
| return bmp; |
| } |
| make_internal_alias(numa_get_mems_allowed); |
| |
| void numa_free(void *mem, size_t size) |
| { |
| munmap(mem, size); |
| } |
| |
| int |
| numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus) |
| { |
| int i; |
| char *p = strchr(line, '\n'); |
| if (!p) |
| return -1; |
| |
| for (i = 0; p > line;i++) { |
| char *oldp, *endp; |
| oldp = p; |
| if (*p == ',') |
| --p; |
| while (p > line && *p != ',') |
| --p; |
| /* Eat two 32bit fields at a time to get longs */ |
| if (p > line && sizeof(unsigned long) == 8) { |
| oldp--; |
| memmove(p, p+1, oldp-p+1); |
| while (p > line && *p != ',') |
| --p; |
| } |
| if (*p == ',') |
| p++; |
| if (i >= CPU_LONGS(ncpus)) |
| return -1; |
| mask[i] = strtoul(p, &endp, 16); |
| if (endp != oldp) |
| return -1; |
| p--; |
| } |
| return 0; |
| } |
| backward_symver(numa_parse_bitmap_v1,numa_parse_bitmap); |
| |
| int |
| numa_parse_bitmap_v2(char *line, struct bitmask *mask) |
| { |
| int i, ncpus; |
| char *p = strchr(line, '\n'); |
| if (!p) |
| return -1; |
| ncpus = mask->size; |
| |
| for (i = 0; p > line;i++) { |
| char *oldp, *endp; |
| oldp = p; |
| if (*p == ',') |
| --p; |
| while (p > line && *p != ',') |
| --p; |
| /* Eat two 32bit fields at a time to get longs */ |
| if (p > line && sizeof(unsigned long) == 8) { |
| oldp--; |
| memmove(p, p+1, oldp-p+1); |
| while (p > line && *p != ',') |
| --p; |
| } |
| if (*p == ',') |
| p++; |
| if (i >= CPU_LONGS(ncpus)) |
| return -1; |
| mask->maskp[i] = strtoul(p, &endp, 16); |
| if (endp != oldp) |
| return -1; |
| p--; |
| } |
| return 0; |
| } |
| symver(numa_parse_bitmap_v2,numa_parse_bitmap); |
| |
| void |
| static init_node_cpu_mask_v2(void) |
| { |
| int nnodes = numa_max_possible_node_v2_int() + 1; |
| node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *)); |
| } |
| |
| /* This would be better with some locking, but I don't want to make libnuma |
| dependent on pthreads right now. The races are relatively harmless. */ |
| int |
| numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) |
| { |
| int err = 0; |
| char fn[64]; |
| FILE *f; |
| char *line = NULL; |
| size_t len = 0; |
| struct bitmask bitmask; |
| int buflen_needed; |
| unsigned long *mask; |
| int ncpus = numa_num_possible_cpus(); |
| int maxnode = numa_max_node_int(); |
| |
| buflen_needed = CPU_BYTES(ncpus); |
| if ((unsigned)node > maxnode || bufferlen < buflen_needed) { |
| errno = ERANGE; |
| return -1; |
| } |
| if (bufferlen > buflen_needed) |
| memset(buffer, 0, bufferlen); |
| if (node_cpu_mask_v1[node]) { |
| memcpy(buffer, node_cpu_mask_v1[node], buflen_needed); |
| return 0; |
| } |
| |
| mask = malloc(buflen_needed); |
| if (!mask) |
| mask = (unsigned long *)buffer; |
| memset(mask, 0, buflen_needed); |
| |
| sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
| f = fopen(fn, "r"); |
| if (!f || getdelim(&line, &len, '\n', f) < 1) { |
| if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
| numa_warn(W_nosysfs2, |
| "/sys not mounted or invalid. Assuming one node: %s", |
| strerror(errno)); |
| numa_warn(W_nosysfs2, |
| "(cannot open or correctly parse %s)", fn); |
| } |
| bitmask.maskp = (unsigned long *)mask; |
| bitmask.size = buflen_needed * 8; |
| numa_bitmask_setall(&bitmask); |
| err = -1; |
| } |
| if (f) |
| fclose(f); |
| |
| if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) { |
| numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
| bitmask.maskp = (unsigned long *)mask; |
| bitmask.size = buflen_needed * 8; |
| numa_bitmask_setall(&bitmask); |
| err = -1; |
| } |
| |
| free(line); |
| memcpy(buffer, mask, buflen_needed); |
| |
| /* slightly racy, see above */ |
| if (node_cpu_mask_v1[node]) { |
| if (mask != buffer) |
| free(mask); |
| } else { |
| node_cpu_mask_v1[node] = mask; |
| } |
| return err; |
| } |
| backward_symver(numa_node_to_cpus_v1,numa_node_to_cpus); |
| |
| /* |
| * test whether a node has cpus |
| */ |
| /* This would be better with some locking, but I don't want to make libnuma |
| dependent on pthreads right now. The races are relatively harmless. */ |
| /* |
| * deliver a bitmask of cpus representing the cpus on a given node |
| */ |
| int |
| numa_node_to_cpus_v2(int node, struct bitmask *buffer) |
| { |
| int err = 0; |
| int nnodes = numa_max_node(); |
| char fn[64], *line = NULL; |
| FILE *f; |
| size_t len = 0; |
| struct bitmask *mask; |
| |
| if (!node_cpu_mask_v2) |
| init_node_cpu_mask_v2(); |
| |
| if (node > nnodes) { |
| errno = ERANGE; |
| return -1; |
| } |
| numa_bitmask_clearall(buffer); |
| |
| if (node_cpu_mask_v2[node]) { |
| /* have already constructed a mask for this node */ |
| if (buffer->size < node_cpu_mask_v2[node]->size) { |
| errno = EINVAL; |
| numa_error("map size mismatch"); |
| return -1; |
| } |
| copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer); |
| return 0; |
| } |
| |
| /* need a new mask for this node */ |
| mask = numa_allocate_cpumask(); |
| |
| /* this is a kernel cpumask_t (see node_read_cpumap()) */ |
| sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
| f = fopen(fn, "r"); |
| if (!f || getdelim(&line, &len, '\n', f) < 1) { |
| if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
| numa_warn(W_nosysfs2, |
| "/sys not mounted or invalid. Assuming one node: %s", |
| strerror(errno)); |
| numa_warn(W_nosysfs2, |
| "(cannot open or correctly parse %s)", fn); |
| } |
| numa_bitmask_setall(mask); |
| err = -1; |
| } |
| if (f) |
| fclose(f); |
| |
| if (line && (numa_parse_bitmap_v2(line, mask) < 0)) { |
| numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
| numa_bitmask_setall(mask); |
| err = -1; |
| } |
| |
| free(line); |
| copy_bitmask_to_bitmask(mask, buffer); |
| |
| /* slightly racy, see above */ |
| /* save the mask we created */ |
| if (node_cpu_mask_v2[node]) { |
| /* how could this be? */ |
| if (mask != buffer) |
| numa_bitmask_free(mask); |
| } else { |
| /* we don't want to cache faulty result */ |
| if (!err) |
| node_cpu_mask_v2[node] = mask; |
| else |
| numa_bitmask_free(mask); |
| } |
| return err; |
| } |
| symver(numa_node_to_cpus_v2,numa_node_to_cpus); |
| |
| make_internal_alias(numa_node_to_cpus_v1); |
| make_internal_alias(numa_node_to_cpus_v2); |
| |
| /* report the node of the specified cpu */ |
| int numa_node_of_cpu(int cpu) |
| { |
| struct bitmask *bmp; |
| int ncpus, nnodes, node, ret; |
| |
| ncpus = numa_num_possible_cpus(); |
| if (cpu > ncpus){ |
| errno = EINVAL; |
| return -1; |
| } |
| bmp = numa_bitmask_alloc(ncpus); |
| nnodes = numa_max_node(); |
| for (node = 0; node <= nnodes; node++){ |
| if (numa_node_to_cpus_v2_int(node, bmp) < 0) { |
| /* It's possible for the node to not exist */ |
| continue; |
| } |
| if (numa_bitmask_isbitset(bmp, cpu)){ |
| ret = node; |
| goto end; |
| } |
| } |
| ret = -1; |
| errno = EINVAL; |
| end: |
| numa_bitmask_free(bmp); |
| return ret; |
| } |
| |
| int |
| numa_run_on_node_mask_v1(const nodemask_t *mask) |
| { |
| int ncpus = numa_num_possible_cpus(); |
| int i, k, err; |
| unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)]; |
| memset(cpus, 0, CPU_BYTES(ncpus)); |
| for (i = 0; i < NUMA_NUM_NODES; i++) { |
| if (mask->n[i / BITS_PER_LONG] == 0) |
| continue; |
| if (nodemask_isset_compat(mask, i)) { |
| if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) { |
| numa_warn(W_noderunmask, |
| "Cannot read node cpumask from sysfs"); |
| continue; |
| } |
| for (k = 0; k < CPU_LONGS(ncpus); k++) |
| cpus[k] |= nodecpus[k]; |
| } |
| } |
| err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus); |
| |
| /* The sched_setaffinity API is broken because it expects |
| the user to guess the kernel cpuset size. Do this in a |
| brute force way. */ |
| if (err < 0 && errno == EINVAL) { |
| int savederrno = errno; |
| char *bigbuf; |
| static int size = -1; |
| if (size == -1) |
| size = CPU_BYTES(ncpus) * 2; |
| bigbuf = malloc(CPU_BUFFER_SIZE); |
| if (!bigbuf) { |
| errno = ENOMEM; |
| return -1; |
| } |
| errno = savederrno; |
| while (size <= CPU_BUFFER_SIZE) { |
| memcpy(bigbuf, cpus, CPU_BYTES(ncpus)); |
| memset(bigbuf + CPU_BYTES(ncpus), 0, |
| CPU_BUFFER_SIZE - CPU_BYTES(ncpus)); |
| err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf); |
| if (err == 0 || errno != EINVAL) |
| break; |
| size *= 2; |
| } |
| savederrno = errno; |
| free(bigbuf); |
| errno = savederrno; |
| } |
| return err; |
| } |
| backward_symver(numa_run_on_node_mask_v1,numa_run_on_node_mask); |
| |
| /* |
| * Given a node mask (size of a kernel nodemask_t) (probably populated by |
| * a user argument list) set up a map of cpus (map "cpus") on those nodes. |
| * Then set affinity to those cpus. |
| */ |
| int |
| numa_run_on_node_mask_v2(struct bitmask *bmp) |
| { |
| int ncpus, i, k, err; |
| struct bitmask *cpus, *nodecpus; |
| |
| cpus = numa_allocate_cpumask(); |
| ncpus = cpus->size; |
| nodecpus = numa_allocate_cpumask(); |
| |
| for (i = 0; i < bmp->size; i++) { |
| if (bmp->maskp[i / BITS_PER_LONG] == 0) |
| continue; |
| if (numa_bitmask_isbitset(bmp, i)) { |
| /* |
| * numa_all_nodes_ptr is cpuset aware; use only |
| * these nodes |
| */ |
| if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
| numa_warn(W_noderunmask, |
| "node %d not allowed", i); |
| continue; |
| } |
| if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
| numa_warn(W_noderunmask, |
| "Cannot read node cpumask from sysfs"); |
| continue; |
| } |
| for (k = 0; k < CPU_LONGS(ncpus); k++) |
| cpus->maskp[k] |= nodecpus->maskp[k]; |
| } |
| } |
| err = numa_sched_setaffinity_v2_int(0, cpus); |
| |
| numa_bitmask_free(cpus); |
| numa_bitmask_free(nodecpus); |
| |
| /* used to have to consider that this could fail - it shouldn't now */ |
| if (err < 0) { |
| numa_error("numa_sched_setaffinity_v2_int() failed; abort\n"); |
| } |
| |
| return err; |
| } |
| symver(numa_run_on_node_mask_v2,numa_run_on_node_mask); |
| |
| make_internal_alias(numa_run_on_node_mask_v2); |
| |
| /* |
| * Given a node mask (size of a kernel nodemask_t) (probably populated by |
| * a user argument list) set up a map of cpus (map "cpus") on those nodes |
| * without any cpuset awareness. Then set affinity to those cpus. |
| */ |
| int |
| numa_run_on_node_mask_all(struct bitmask *bmp) |
| { |
| int ncpus, i, k, err; |
| struct bitmask *cpus, *nodecpus; |
| |
| cpus = numa_allocate_cpumask(); |
| ncpus = cpus->size; |
| nodecpus = numa_allocate_cpumask(); |
| |
| for (i = 0; i < bmp->size; i++) { |
| if (bmp->maskp[i / BITS_PER_LONG] == 0) |
| continue; |
| if (numa_bitmask_isbitset(bmp, i)) { |
| if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) { |
| numa_warn(W_noderunmask, |
| "node %d not allowed", i); |
| continue; |
| } |
| if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
| numa_warn(W_noderunmask, |
| "Cannot read node cpumask from sysfs"); |
| continue; |
| } |
| for (k = 0; k < CPU_LONGS(ncpus); k++) |
| cpus->maskp[k] |= nodecpus->maskp[k]; |
| } |
| } |
| err = numa_sched_setaffinity_v2_int(0, cpus); |
| |
| numa_bitmask_free(cpus); |
| numa_bitmask_free(nodecpus); |
| |
| /* With possible nodes freedom it can happen easily now */ |
| if (err < 0) { |
| numa_error("numa_sched_setaffinity_v2_int() failed"); |
| } |
| |
| return err; |
| } |
| |
| nodemask_t |
| numa_get_run_node_mask_v1(void) |
| { |
| int ncpus = numa_num_configured_cpus(); |
| int i, k; |
| int max = numa_max_node_int(); |
| struct bitmask *bmp, *cpus, *nodecpus; |
| nodemask_t nmp; |
| |
| cpus = numa_allocate_cpumask(); |
| if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
| nmp = numa_no_nodes; |
| goto free_cpus; |
| } |
| |
| nodecpus = numa_allocate_cpumask(); |
| bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */ |
| for (i = 0; i <= max; i++) { |
| if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
| /* It's possible for the node to not exist */ |
| continue; |
| } |
| for (k = 0; k < CPU_LONGS(ncpus); k++) { |
| if (nodecpus->maskp[k] & cpus->maskp[k]) |
| numa_bitmask_setbit(bmp, i); |
| } |
| } |
| copy_bitmask_to_nodemask(bmp, &nmp); |
| numa_bitmask_free(bmp); |
| numa_bitmask_free(nodecpus); |
| free_cpus: |
| numa_bitmask_free(cpus); |
| return nmp; |
| } |
| backward_symver(numa_get_run_node_mask_v1,numa_get_run_node_mask); |
| |
| struct bitmask * |
| numa_get_run_node_mask_v2(void) |
| { |
| int i, k; |
| int ncpus = numa_num_configured_cpus(); |
| int max = numa_max_node_int(); |
| struct bitmask *bmp, *cpus, *nodecpus; |
| |
| bmp = numa_allocate_cpumask(); |
| cpus = numa_allocate_cpumask(); |
| if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
| copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
| goto free_cpus; |
| } |
| |
| nodecpus = numa_allocate_cpumask(); |
| for (i = 0; i <= max; i++) { |
| /* |
| * numa_all_nodes_ptr is cpuset aware; show only |
| * these nodes |
| */ |
| if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
| continue; |
| } |
| if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
| /* It's possible for the node to not exist */ |
| continue; |
| } |
| for (k = 0; k < CPU_LONGS(ncpus); k++) { |
| if (nodecpus->maskp[k] & cpus->maskp[k]) |
| numa_bitmask_setbit(bmp, i); |
| } |
| } |
| numa_bitmask_free(nodecpus); |
| free_cpus: |
| numa_bitmask_free(cpus); |
| return bmp; |
| } |
| symver(numa_get_run_node_mask_v2,numa_get_run_node_mask); |
| |
| int |
| numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes) |
| { |
| int numa_num_nodes = numa_num_possible_nodes(); |
| |
| return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp, |
| tonodes->maskp); |
| } |
| |
| int numa_move_pages(int pid, unsigned long count, |
| void **pages, const int *nodes, int *status, int flags) |
| { |
| return move_pages(pid, count, pages, nodes, status, flags); |
| } |
| |
| int numa_run_on_node(int node) |
| { |
| int numa_num_nodes = numa_num_possible_nodes(); |
| int ret = -1; |
| struct bitmask *cpus; |
| |
| if (node >= numa_num_nodes){ |
| errno = EINVAL; |
| goto out; |
| } |
| |
| cpus = numa_allocate_cpumask(); |
| |
| if (node == -1) |
| numa_bitmask_setall(cpus); |
| else if (numa_node_to_cpus_v2_int(node, cpus) < 0){ |
| numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs"); |
| goto free; |
| } |
| |
| ret = numa_sched_setaffinity_v2_int(0, cpus); |
| free: |
| numa_bitmask_free(cpus); |
| out: |
| return ret; |
| } |
| |
| int numa_preferred(void) |
| { |
| int policy; |
| int ret; |
| struct bitmask *bmp; |
| |
| bmp = numa_allocate_nodemask(); |
| getpol(&policy, bmp); |
| if (policy == MPOL_PREFERRED || policy == MPOL_BIND) { |
| int i; |
| int max = numa_num_possible_nodes(); |
| for (i = 0; i < max ; i++) |
| if (numa_bitmask_isbitset(bmp, i)){ |
| ret = i; |
| goto end; |
| } |
| } |
| /* could read the current CPU from /proc/self/status. Probably |
| not worth it. */ |
| ret = 0; /* or random one? */ |
| end: |
| numa_bitmask_free(bmp); |
| return ret; |
| } |
| |
| void numa_set_preferred(int node) |
| { |
| struct bitmask *bmp; |
| |
| bmp = numa_allocate_nodemask(); |
| if (node >= 0) { |
| numa_bitmask_setbit(bmp, node); |
| setpol(MPOL_PREFERRED, bmp); |
| } else |
| setpol(MPOL_DEFAULT, bmp); |
| numa_bitmask_free(bmp); |
| } |
| |
| void numa_set_localalloc(void) |
| { |
| setpol(MPOL_DEFAULT, numa_no_nodes_ptr); |
| } |
| |
| void numa_bind_v1(const nodemask_t *nodemask) |
| { |
| struct bitmask bitmask; |
| |
| bitmask.maskp = (unsigned long *)nodemask; |
| bitmask.size = sizeof(nodemask_t); |
| numa_run_on_node_mask_v2_int(&bitmask); |
| numa_set_membind_v2_int(&bitmask); |
| } |
| backward_symver(numa_bind_v1,numa_bind); |
| |
| void numa_bind_v2(struct bitmask *bmp) |
| { |
| numa_run_on_node_mask_v2_int(bmp); |
| numa_set_membind_v2_int(bmp); |
| } |
| symver(numa_bind_v2,numa_bind); |
| |
| void numa_set_strict(int flag) |
| { |
| if (flag) |
| mbind_flags |= MPOL_MF_STRICT; |
| else |
| mbind_flags &= ~MPOL_MF_STRICT; |
| } |
| |
| /* |
| * Extract a node or processor number from the given string. |
| * Allow a relative node / processor specification within the allowed |
| * set if "relative" is nonzero |
| */ |
| static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative) |
| { |
| long i, nr; |
| |
| if (!relative) |
| return strtoul(s, end, 0); |
| |
| nr = strtoul(s, end, 0); |
| if (s == *end) |
| return nr; |
| /* Find the nth set bit */ |
| for (i = 0; nr >= 0 && i <= bmp->size; i++) |
| if (numa_bitmask_isbitset(bmp, i)) |
| nr--; |
| return i-1; |
| } |
| |
| /* |
| * __numa_parse_nodestring() is called to create a node mask, given |
| * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
| * (the + indicates that the numbers are nodeset-relative) |
| * |
| * The nodes may be specified as absolute, or relative to the current nodeset. |
| * The list of available nodes is in a map pointed to by "allowed_nodes_ptr", |
| * which may represent all nodes or the nodes in the current nodeset. |
| * |
| * The caller must free the returned bitmask. |
| */ |
| static struct bitmask * |
| __numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr) |
| { |
| int invert = 0, relative = 0; |
| int conf_nodes = numa_num_configured_nodes(); |
| char *end; |
| struct bitmask *mask; |
| |
| mask = numa_allocate_nodemask(); |
| |
| if (s[0] == 0){ |
| copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask); |
| return mask; /* return freeable mask */ |
| } |
| if (*s == '!') { |
| invert = 1; |
| s++; |
| } |
| if (*s == '+') { |
| relative++; |
| s++; |
| } |
| do { |
| unsigned long arg; |
| int i; |
| if (isalpha(*s)) { |
| int n; |
| if (!strcmp(s,"all")) { |
| copy_bitmask_to_bitmask(allowed_nodes_ptr, |
| mask); |
| s+=4; |
| break; |
| } |
| n = resolve_affinity(s, mask); |
| if (n != NO_IO_AFFINITY) { |
| if (n < 0) |
| goto err; |
| s += strlen(s) + 1; |
| break; |
| } |
| } |
| arg = get_nr(s, &end, allowed_nodes_ptr, relative); |
| if (end == s) { |
| numa_warn(W_nodeparse, "unparseable node description `%s'\n", s); |
| goto err; |
| } |
| if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) { |
| numa_warn(W_nodeparse, "node argument %d is out of range\n", arg); |
| goto err; |
| } |
| i = arg; |
| numa_bitmask_setbit(mask, i); |
| s = end; |
| if (*s == '-') { |
| char *end2; |
| unsigned long arg2; |
| arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative); |
| if (end2 == s) { |
| numa_warn(W_nodeparse, "missing node argument %s\n", s); |
| goto err; |
| } |
| if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) { |
| numa_warn(W_nodeparse, "node argument %d out of range\n", arg2); |
| goto err; |
| } |
| while (arg <= arg2) { |
| i = arg; |
| if (numa_bitmask_isbitset(allowed_nodes_ptr,i)) |
| numa_bitmask_setbit(mask, i); |
| arg++; |
| } |
| s = end2; |
| } |
| } while (*s++ == ','); |
| if (s[-1] != '\0') |
| goto err; |
| if (invert) { |
| int i; |
| for (i = 0; i < conf_nodes; i++) { |
| if (numa_bitmask_isbitset(mask, i)) |
| numa_bitmask_clearbit(mask, i); |
| else |
| numa_bitmask_setbit(mask, i); |
| } |
| } |
| return mask; |
| |
| err: |
| numa_bitmask_free(mask); |
| return NULL; |
| } |
| |
| /* |
| * numa_parse_nodestring() is called to create a bitmask from nodes available |
| * for this task. |
| */ |
| |
| struct bitmask * numa_parse_nodestring(const char *s) |
| { |
| return __numa_parse_nodestring(s, numa_all_nodes_ptr); |
| } |
| |
| /* |
| * numa_parse_nodestring_all() is called to create a bitmask from all nodes |
| * available. |
| */ |
| |
| struct bitmask * numa_parse_nodestring_all(const char *s) |
| { |
| return __numa_parse_nodestring(s, numa_possible_nodes_ptr); |
| } |
| |
| /* |
| * __numa_parse_cpustring() is called to create a bitmask, given |
| * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
| * (the + indicates that the numbers are cpuset-relative) |
| * |
| * The cpus may be specified as absolute, or relative to the current cpuset. |
| * The list of available cpus for this task is in the map pointed to by |
| * "allowed_cpus_ptr", which may represent all cpus or the cpus in the |
| * current cpuset. |
| * |
| * The caller must free the returned bitmask. |
| */ |
| static struct bitmask * |
| __numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr) |
| { |
| int invert = 0, relative=0; |
| int conf_cpus = numa_num_configured_cpus(); |
| char *end; |
| struct bitmask *mask; |
| |
| mask = numa_allocate_cpumask(); |
| |
| if (s[0] == 0) |
| return mask; |
| if (*s == '!') { |
| invert = 1; |
| s++; |
| } |
| if (*s == '+') { |
| relative++; |
| s++; |
| } |
| do { |
| unsigned long arg; |
| int i; |
| |
| if (!strcmp(s,"all")) { |
| copy_bitmask_to_bitmask(allowed_cpus_ptr, mask); |
| s+=4; |
| break; |
| } |
| arg = get_nr(s, &end, allowed_cpus_ptr, relative); |
| if (end == s) { |
| numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s); |
| goto err; |
| } |
| if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) { |
| numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s); |
| goto err; |
| } |
| i = arg; |
| numa_bitmask_setbit(mask, i); |
| s = end; |
| if (*s == '-') { |
| char *end2; |
| unsigned long arg2; |
| int i; |
| arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative); |
| if (end2 == s) { |
| numa_warn(W_cpuparse, "missing cpu argument %s\n", s); |
| goto err; |
| } |
| if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) { |
| numa_warn(W_cpuparse, "cpu argument %s out of range\n", s); |
| goto err; |
| } |
| while (arg <= arg2) { |
| i = arg; |
| if (numa_bitmask_isbitset(allowed_cpus_ptr, i)) |
| numa_bitmask_setbit(mask, i); |
| arg++; |
| } |
| s = end2; |
| } |
| } while (*s++ == ','); |
| if (s[-1] != '\0') |
| goto err; |
| if (invert) { |
| int i; |
| for (i = 0; i < conf_cpus; i++) { |
| if (numa_bitmask_isbitset(mask, i)) |
| numa_bitmask_clearbit(mask, i); |
| else |
| numa_bitmask_setbit(mask, i); |
| } |
| } |
| return mask; |
| |
| err: |
| numa_bitmask_free(mask); |
| return NULL; |
| } |
| |
| /* |
| * numa_parse_cpustring() is called to create a bitmask from cpus available |
| * for this task. |
| */ |
| |
| struct bitmask * numa_parse_cpustring(const char *s) |
| { |
| return __numa_parse_cpustring(s, numa_all_cpus_ptr); |
| } |
| |
| /* |
| * numa_parse_cpustring_all() is called to create a bitmask from all cpus |
| * available. |
| */ |
| |
| struct bitmask * numa_parse_cpustring_all(const char *s) |
| { |
| return __numa_parse_cpustring(s, numa_possible_cpus_ptr); |
| } |