blob: 700b30cfb4b6cc5ef7dab1a207b83090f288b4d7 [file] [log] [blame]
/* Simple NUMA library.
Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
Cliff Wickman,SGI.
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
All calls are undefined when numa_available returns an error. */
#define _GNU_SOURCE 1
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sched.h>
#include <dirent.h>
#include <errno.h>
#include <stdarg.h>
#include <ctype.h>
#include <sys/mman.h>
#include <limits.h>
#ifdef MEMORY_SANITIZER
#include <sanitizer/msan_interface.h>
#endif
#include "config.h"
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
#include "affinity.h"
#define WEAK __attribute__((weak))
#define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */
/* these are the old (version 1) masks */
nodemask_t numa_no_nodes;
nodemask_t numa_all_nodes;
/* these are now the default bitmask (pointers to) (version 2) */
struct bitmask *numa_no_nodes_ptr = NULL;
struct bitmask *numa_all_nodes_ptr = NULL;
struct bitmask *numa_possible_nodes_ptr = NULL;
struct bitmask *numa_all_cpus_ptr = NULL;
struct bitmask *numa_possible_cpus_ptr = NULL;
/* I would prefer to use symbol versioning to create v1 and v2 versions
of numa_no_nodes and numa_all_nodes, but the loader does not correctly
handle versioning of BSS versus small data items */
struct bitmask *numa_nodes_ptr = NULL;
static struct bitmask *numa_memnode_ptr = NULL;
static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
static struct bitmask **node_cpu_mask_v2;
WEAK void numa_error(const char *where);
#ifndef TLS
#warning "not threadsafe"
#define __thread
#endif
static __thread int bind_policy = MPOL_BIND;
static __thread unsigned int mbind_flags = 0;
static int sizes_set=0;
static int maxconfigurednode = -1;
static int maxconfiguredcpu = -1;
static int numprocnode = -1;
static int numproccpu = -1;
static int nodemask_sz = 0;
static int cpumask_sz = 0;
int numa_exit_on_error = 0;
int numa_exit_on_warn = 0;
static void set_sizes(void);
/*
* There are two special functions, _init(void) and _fini(void), which
* are called automatically by the dynamic loader whenever a library is loaded.
*
* The v1 library depends upon nodemask_t's of all nodes and no nodes.
*/
void __attribute__((constructor))
numa_init(void)
{
int max,i;
if (sizes_set)
return;
set_sizes();
/* numa_all_nodes should represent existing nodes on this system */
max = numa_num_configured_nodes();
for (i = 0; i < max; i++)
nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
}
#define FREE_AND_ZERO(x) if (x) { \
numa_bitmask_free(x); \
x = NULL; \
}
void __attribute__((destructor))
numa_fini(void)
{
FREE_AND_ZERO(numa_all_cpus_ptr);
FREE_AND_ZERO(numa_possible_cpus_ptr);
FREE_AND_ZERO(numa_all_nodes_ptr);
FREE_AND_ZERO(numa_possible_nodes_ptr);
FREE_AND_ZERO(numa_no_nodes_ptr);
FREE_AND_ZERO(numa_memnode_ptr);
FREE_AND_ZERO(numa_nodes_ptr);
}
/*
* The following bitmask declarations, bitmask_*() routines, and associated
* _setbit() and _getbit() routines are:
* Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
* SGI publishes it under the terms of the GNU General Public License, v2,
* as published by the Free Software Foundation.
*/
static unsigned int
_getbit(const struct bitmask *bmp, unsigned int n)
{
if (n < bmp->size)
return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
else
return 0;
}
static void
_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
{
if (n < bmp->size) {
if (v)
bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
else
bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
}
}
int
numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
{
return _getbit(bmp, i);
}
struct bitmask *
numa_bitmask_setall(struct bitmask *bmp)
{
unsigned int i;
for (i = 0; i < bmp->size; i++)
_setbit(bmp, i, 1);
return bmp;
}
struct bitmask *
numa_bitmask_clearall(struct bitmask *bmp)
{
unsigned int i;
for (i = 0; i < bmp->size; i++)
_setbit(bmp, i, 0);
return bmp;
}
struct bitmask *
numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
{
_setbit(bmp, i, 1);
return bmp;
}
struct bitmask *
numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
{
_setbit(bmp, i, 0);
return bmp;
}
unsigned int
numa_bitmask_nbytes(struct bitmask *bmp)
{
return longsperbits(bmp->size) * sizeof(unsigned long);
}
/* where n is the number of bits in the map */
/* This function should not exit on failure, but right now we cannot really
recover from this. */
struct bitmask *
numa_bitmask_alloc(unsigned int n)
{
struct bitmask *bmp;
if (n < 1) {
errno = EINVAL;
numa_error("request to allocate mask for invalid number");
exit(1);
}
bmp = malloc(sizeof(*bmp));
if (bmp == 0)
goto oom;
bmp->size = n;
bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
if (bmp->maskp == 0) {
free(bmp);
goto oom;
}
return bmp;
oom:
numa_error("Out of memory allocating bitmask");
exit(1);
}
void
numa_bitmask_free(struct bitmask *bmp)
{
if (bmp == 0)
return;
free(bmp->maskp);
bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */
free(bmp);
return;
}
/* True if two bitmasks are equal */
int
numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
{
unsigned int i;
for (i = 0; i < bmp1->size || i < bmp2->size; i++)
if (_getbit(bmp1, i) != _getbit(bmp2, i))
return 0;
return 1;
}
/* Hamming Weight: number of set bits */
unsigned int numa_bitmask_weight(const struct bitmask *bmp)
{
unsigned int i;
unsigned int w = 0;
for (i = 0; i < bmp->size; i++)
if (_getbit(bmp, i))
w++;
return w;
}
/* *****end of bitmask_ routines ************ */
/* Next two can be overwritten by the application for different error handling */
WEAK void numa_error(const char *where)
{
int olde = errno;
perror(where);
if (numa_exit_on_error)
exit(1);
errno = olde;
}
WEAK void numa_warn(int num, const char *fmt, ...)
{
static unsigned warned;
va_list ap;
int olde = errno;
/* Give each warning only once */
if ((1<<num) & warned)
return;
warned |= (1<<num);
va_start(ap,fmt);
fprintf(stderr, "libnuma: Warning: ");
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
va_end(ap);
errno = olde;
}
static void setpol(int policy, struct bitmask *bmp)
{
if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
numa_error("set_mempolicy");
}
static void getpol(int *oldpolicy, struct bitmask *bmp)
{
if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
numa_error("get_mempolicy");
}
static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
{
if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
mbind_flags) < 0)
numa_error("mbind");
}
/* (undocumented) */
/* gives the wrong answer for hugetlbfs mappings. */
int numa_pagesize(void)
{
static int pagesize;
if (pagesize > 0)
return pagesize;
pagesize = getpagesize();
return pagesize;
}
make_internal_alias(numa_pagesize);
/*
* Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
* and the highest numbered existing node (maxconfigurednode).
*/
static void
set_configured_nodes(void)
{
DIR *d;
struct dirent *de;
long long freep;
numa_memnode_ptr = numa_allocate_nodemask();
numa_nodes_ptr = numa_allocate_nodemask();
d = opendir("/sys/devices/system/node");
if (!d) {
maxconfigurednode = 0;
} else {
while ((de = readdir(d)) != NULL) {
int nd;
if (strncmp(de->d_name, "node", 4))
continue;
nd = strtoul(de->d_name+4, NULL, 0);
numa_bitmask_setbit(numa_nodes_ptr, nd);
if (numa_node_size64(nd, &freep) > 0)
numa_bitmask_setbit(numa_memnode_ptr, nd);
if (maxconfigurednode < nd)
maxconfigurednode = nd;
}
closedir(d);
}
}
/*
* Convert the string length of an ascii hex mask to the number
* of bits represented by that mask.
*/
static int s2nbits(const char *s)
{
return strlen(s) * 32 / 9;
}
/* Is string 'pre' a prefix of string 's'? */
static int strprefix(const char *s, const char *pre)
{
return strncmp(s, pre, strlen(pre)) == 0;
}
static const char *mask_size_file = "/proc/self/status";
static const char *nodemask_prefix = "Mems_allowed:\t";
/*
* (do this the way Paul Jackson's libcpuset does it)
* The nodemask values in /proc/self/status are in an
* ascii format that uses 9 characters for each 32 bits of mask.
* (this could also be used to find the cpumask size)
*/
static void
set_nodemask_size(void)
{
FILE *fp;
char *buf = NULL;
size_t bufsize = 0;
if ((fp = fopen(mask_size_file, "r")) == NULL)
goto done;
while (getline(&buf, &bufsize, fp) > 0) {
#ifdef MEMORY_SANITIZER
__msan_unpoison_string(buf);
#endif
if (strprefix(buf, nodemask_prefix)) {
nodemask_sz = s2nbits(buf + strlen(nodemask_prefix));
break;
}
}
free(buf);
fclose(fp);
done:
if (nodemask_sz == 0) {/* fall back on error */
int pol;
unsigned long *mask = NULL;
nodemask_sz = 16;
do {
nodemask_sz <<= 1;
mask = realloc(mask, nodemask_sz / 8);
if (!mask)
return;
} while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
nodemask_sz < 4096*8);
free(mask);
}
}
/*
* Read a mask consisting of a sequence of hexadecimal longs separated by
* commas. Order them correctly and return the number of bits set.
*/
static int
read_mask(char *s, struct bitmask *bmp)
{
char *end = s;
int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
unsigned int tmp[tmplen];
unsigned int *start = tmp;
unsigned int i, n = 0, m = 0;
if (!s)
return 0; /* shouldn't happen */
i = strtoul(s, &end, 16);
/* Skip leading zeros */
while (!i && *end++ == ',') {
i = strtoul(end, &end, 16);
}
if (!i)
/* End of string. No mask */
return -1;
start[n++] = i;
/* Read sequence of ints */
while (*end++ == ',') {
i = strtoul(end, &end, 16);
start[n++] = i;
/* buffer overflow */
if (n > tmplen)
return -1;
}
/*
* Invert sequence of ints if necessary since the first int
* is the highest and we put it first because we read it first.
*/
while (n) {
int w;
unsigned long x = 0;
/* read into long values in an endian-safe way */
for (w = 0; n && w < bitsperlong; w += bitsperint)
x |= ((unsigned long)start[n-- - 1] << w);
bmp->maskp[m++] = x;
}
/*
* Return the number of bits set
*/
return numa_bitmask_weight(bmp);
}
/*
* Read a processes constraints in terms of nodes and cpus from
* /proc/self/status.
*/
static void
set_task_constraints(void)
{
int hicpu = maxconfiguredcpu;
int i;
char *buffer = NULL;
size_t buflen = 0;
FILE *f;
numa_all_cpus_ptr = numa_allocate_cpumask();
numa_possible_cpus_ptr = numa_allocate_cpumask();
numa_all_nodes_ptr = numa_allocate_nodemask();
numa_possible_nodes_ptr = numa_allocate_cpumask();
numa_no_nodes_ptr = numa_allocate_nodemask();
f = fopen(mask_size_file, "r");
if (!f) {
//numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
return;
}
while (getline(&buffer, &buflen, f) > 0) {
#ifdef MEMORY_SANITIZER
__msan_unpoison_string(buffer);
#endif
/* mask starts after [last] tab */
char *mask = strrchr(buffer,'\t') + 1;
if (strncmp(buffer,"Cpus_allowed:",13) == 0)
numproccpu = read_mask(mask, numa_all_cpus_ptr);
if (strncmp(buffer,"Mems_allowed:",13) == 0) {
numprocnode = read_mask(mask, numa_all_nodes_ptr);
}
}
fclose(f);
free(buffer);
for (i = 0; i <= hicpu; i++)
numa_bitmask_setbit(numa_possible_cpus_ptr, i);
for (i = 0; i <= maxconfigurednode; i++)
numa_bitmask_setbit(numa_possible_nodes_ptr, i);
/*
* Cpus_allowed in the kernel can be defined to all f's
* i.e. it may be a superset of the actual available processors.
* As such let's reduce numproccpu to the number of actual
* available cpus.
*/
if (numproccpu <= 0) {
for (i = 0; i <= hicpu; i++)
numa_bitmask_setbit(numa_all_cpus_ptr, i);
numproccpu = hicpu+1;
}
if (numproccpu > hicpu+1) {
numproccpu = hicpu+1;
for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
numa_bitmask_clearbit(numa_all_cpus_ptr, i);
}
}
if (numprocnode <= 0) {
for (i = 0; i <= maxconfigurednode; i++)
numa_bitmask_setbit(numa_all_nodes_ptr, i);
numprocnode = maxconfigurednode + 1;
}
return;
}
/*
* Find the highest cpu number possible (in other words the size
* of a kernel cpumask_t (in bits) - 1)
*/
static void
set_numa_max_cpu(void)
{
int len = 4096;
int n;
int olde = errno;
struct bitmask *buffer;
do {
buffer = numa_bitmask_alloc(len);
n = numa_sched_getaffinity_v2_int(0, buffer);
/* on success, returns size of kernel cpumask_t, in bytes */
if (n < 0) {
if (errno == EINVAL) {
if (len >= 1024*1024)
break;
len *= 2;
numa_bitmask_free(buffer);
continue;
} else {
numa_warn(W_numcpus, "Unable to determine max cpu"
" (sched_getaffinity: %s); guessing...",
strerror(errno));
n = sizeof(cpu_set_t);
break;
}
}
} while (n < 0);
numa_bitmask_free(buffer);
errno = olde;
cpumask_sz = n*8;
}
/*
* get the total (configured) number of cpus - both online and offline
*/
static void
set_configured_cpus(void)
{
maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
if (maxconfiguredcpu == -1)
numa_error("sysconf(NPROCESSORS_CONF) failed");
}
/*
* Initialize all the sizes.
*/
static void
set_sizes(void)
{
sizes_set++;
set_nodemask_size(); /* size of kernel nodemask_t */
set_configured_nodes(); /* configured nodes listed in /sys */
set_numa_max_cpu(); /* size of kernel cpumask_t */
set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */
set_task_constraints(); /* cpus and nodes for current task */
}
int
numa_num_configured_nodes(void)
{
/*
* NOTE: this function's behavior matches the documentation (ie: it
* returns a count of nodes with memory) despite the poor function
* naming. We also cannot use the similarly poorly named
* numa_all_nodes_ptr as it only tracks nodes with memory from which
* the calling process can allocate. Think sparse nodes, memory-less
* nodes, cpusets...
*/
int memnodecount=0, i;
for (i=0; i <= maxconfigurednode; i++) {
if (numa_bitmask_isbitset(numa_memnode_ptr, i))
memnodecount++;
}
return memnodecount;
}
int
numa_num_configured_cpus(void)
{
return maxconfiguredcpu+1;
}
int
numa_num_possible_nodes(void)
{
return nodemask_sz;
}
int
numa_num_possible_cpus(void)
{
return cpumask_sz;
}
int
numa_num_task_nodes(void)
{
return numprocnode;
}
/*
* for backward compatibility
*/
int
numa_num_thread_nodes(void)
{
return numa_num_task_nodes();
}
int
numa_num_task_cpus(void)
{
return numproccpu;
}
/*
* for backward compatibility
*/
int
numa_num_thread_cpus(void)
{
return numa_num_task_cpus();
}
/*
* Return the number of the highest node in this running system,
*/
int
numa_max_node(void)
{
return maxconfigurednode;
}
make_internal_alias(numa_max_node);
/*
* Return the number of the highest possible node in a system,
* which for v1 is the size of a numa.h nodemask_t(in bits)-1.
* but for v2 is the size of a kernel nodemask_t(in bits)-1.
*/
int
numa_max_possible_node_v1(void)
{
return ((sizeof(nodemask_t)*8)-1);
}
backward_symver(numa_max_possible_node_v1,numa_max_possible_node);
int
numa_max_possible_node_v2(void)
{
return numa_num_possible_nodes()-1;
}
symver(numa_max_possible_node_v2,numa_max_possible_node);
make_internal_alias(numa_max_possible_node_v1);
make_internal_alias(numa_max_possible_node_v2);
/*
* Allocate a bitmask for cpus, of a size large enough to
* match the kernel's cpumask_t.
*/
struct bitmask *
numa_allocate_cpumask()
{
int ncpus = numa_num_possible_cpus();
return numa_bitmask_alloc(ncpus);
}
/*
* Allocate a bitmask the size of a libnuma nodemask_t
*/
static struct bitmask *
allocate_nodemask_v1(void)
{
int nnodes = numa_max_possible_node_v1_int()+1;
return numa_bitmask_alloc(nnodes);
}
/*
* Allocate a bitmask for nodes, of a size large enough to
* match the kernel's nodemask_t.
*/
struct bitmask *
numa_allocate_nodemask(void)
{
struct bitmask *bmp;
int nnodes = numa_max_possible_node_v2_int() + 1;
bmp = numa_bitmask_alloc(nnodes);
return bmp;
}
/* (cache the result?) */
long long numa_node_size64(int node, long long *freep)
{
size_t len = 0;
char *line = NULL;
long long size = -1;
FILE *f;
char fn[64];
int ok = 0;
int required = freep ? 2 : 1;
if (freep)
*freep = -1;
sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
f = fopen(fn, "r");
if (!f)
return -1;
while (getdelim(&line, &len, '\n', f) > 0) {
char *end;
char *s = strcasestr(line, "kB");
if (!s)
continue;
--s;
while (s > line && isspace(*s))
--s;
while (s > line && isdigit(*s))
--s;
if (strstr(line, "MemTotal")) {
size = strtoull(s,&end,0) << 10;
if (end == s)
size = -1;
else
ok++;
}
if (freep && strstr(line, "MemFree")) {
*freep = strtoull(s,&end,0) << 10;
if (end == s)
*freep = -1;
else
ok++;
}
}
fclose(f);
free(line);
if (ok != required)
numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
return size;
}
make_internal_alias(numa_node_size64);
long numa_node_size(int node, long *freep)
{
long long f2;
long sz = numa_node_size64_int(node, &f2);
if (freep)
*freep = f2;
return sz;
}
int numa_available(void)
{
if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
return -1;
return 0;
}
void
numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.size = sizeof(nodemask_t) * 8;
bitmask.maskp = (unsigned long *)mask;
dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
}
backward_symver(numa_interleave_memory_v1,numa_interleave_memory);
void
numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
{
dombind(mem, size, MPOL_INTERLEAVE, bmp);
}
symver(numa_interleave_memory_v2,numa_interleave_memory);
void numa_tonode_memory(void *mem, size_t size, int node)
{
struct bitmask *nodes;
nodes = numa_allocate_nodemask();
numa_bitmask_setbit(nodes, node);
dombind(mem, size, bind_policy, nodes);
numa_bitmask_free(nodes);
}
void
numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
dombind(mem, size, bind_policy, &bitmask);
}
backward_symver(numa_tonodemask_memory_v1,numa_tonodemask_memory);
void
numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
{
dombind(mem, size, bind_policy, bmp);
}
symver(numa_tonodemask_memory_v2,numa_tonodemask_memory);
void numa_setlocal_memory(void *mem, size_t size)
{
dombind(mem, size, MPOL_PREFERRED, NULL);
}
void numa_police_memory(void *mem, size_t size)
{
int pagesize = numa_pagesize_int();
unsigned long i;
for (i = 0; i < size; i += pagesize)
((volatile char*)mem)[i] = ((volatile char*)mem)[i];
}
make_internal_alias(numa_police_memory);
void *numa_alloc(size_t size)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
numa_police_memory_int(mem, size);
return mem;
}
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
{
char *mem;
mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
if (mem == (char *)-1)
return NULL;
/*
* The memory policy of the allocated pages is preserved by mremap(), so
* there is no need to (re)set it here. If the policy of the original
* allocation is not set, the new pages will be allocated according to the
* process' mempolicy. Trying to allocate explicitly the new pages on the
* same node as the original ones would require changing the policy of the
* newly allocated pages, which violates the numa_realloc() semantics.
*/
return mem;
}
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
{
char *mem;
struct bitmask bitmask;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
return mem;
}
backward_symver(numa_alloc_interleaved_subset_v1,numa_alloc_interleaved_subset);
void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
dombind(mem, size, MPOL_INTERLEAVE, bmp);
return mem;
}
symver(numa_alloc_interleaved_subset_v2,numa_alloc_interleaved_subset);
make_internal_alias(numa_alloc_interleaved_subset_v1);
make_internal_alias(numa_alloc_interleaved_subset_v2);
void *
numa_alloc_interleaved(size_t size)
{
return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
}
/*
* given a user node mask, set memory policy to use those nodes
*/
void
numa_set_interleave_mask_v1(nodemask_t *mask)
{
struct bitmask *bmp;
int nnodes = numa_max_possible_node_v1_int()+1;
bmp = numa_bitmask_alloc(nnodes);
copy_nodemask_to_bitmask(mask, bmp);
if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
setpol(MPOL_DEFAULT, bmp);
else
setpol(MPOL_INTERLEAVE, bmp);
numa_bitmask_free(bmp);
}
backward_symver(numa_set_interleave_mask_v1,numa_set_interleave_mask);
void
numa_set_interleave_mask_v2(struct bitmask *bmp)
{
if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
setpol(MPOL_DEFAULT, bmp);
else
setpol(MPOL_INTERLEAVE, bmp);
}
symver(numa_set_interleave_mask_v2,numa_set_interleave_mask);
nodemask_t
numa_get_interleave_mask_v1(void)
{
int oldpolicy = -1;
struct bitmask *bmp;
nodemask_t mask;
bmp = allocate_nodemask_v1();
getpol(&oldpolicy, bmp);
if (oldpolicy == MPOL_INTERLEAVE)
copy_bitmask_to_nodemask(bmp, &mask);
else
copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
numa_bitmask_free(bmp);
return mask;
}
backward_symver(numa_get_interleave_mask_v1,numa_get_interleave_mask);
struct bitmask *
numa_get_interleave_mask_v2(void)
{
int oldpolicy = -1;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&oldpolicy, bmp);
if (oldpolicy != MPOL_INTERLEAVE)
copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
return bmp;
}
symver(numa_get_interleave_mask_v2,numa_get_interleave_mask);
/* (undocumented) */
int numa_get_interleave_node(void)
{
int nd;
if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
return nd;
return 0;
}
void *numa_alloc_onnode(size_t size, int node)
{
char *mem;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
numa_bitmask_setbit(bmp, node);
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
mem = NULL;
else
dombind(mem, size, bind_policy, bmp);
numa_bitmask_free(bmp);
return mem;
}
void *numa_alloc_local(size_t size)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
mem = NULL;
else
dombind(mem, size, MPOL_PREFERRED, NULL);
return mem;
}
void numa_set_bind_policy(int strict)
{
if (strict)
bind_policy = MPOL_BIND;
else
bind_policy = MPOL_PREFERRED;
}
void
numa_set_membind_v1(const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
setpol(MPOL_BIND, &bitmask);
}
backward_symver(numa_set_membind_v1,numa_set_membind);
void
numa_set_membind_v2(struct bitmask *bmp)
{
setpol(MPOL_BIND, bmp);
}
symver(numa_set_membind_v2,numa_set_membind);
make_internal_alias(numa_set_membind_v2);
/*
* copy a bitmask map body to a numa.h nodemask_t structure
*/
void
copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
{
int max, i;
memset(nmp, 0, sizeof(nodemask_t));
max = (sizeof(nodemask_t)*8);
for (i=0; i<bmp->size; i++) {
if (i >= max)
break;
if (numa_bitmask_isbitset(bmp, i))
nodemask_set_compat((nodemask_t *)nmp, i);
}
}
/*
* copy a bitmask map body to another bitmask body
* fill a larger destination with zeroes
*/
void
copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
{
int bytes;
if (bmpfrom->size >= bmpto->size) {
memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
} else if (bmpfrom->size < bmpto->size) {
bytes = CPU_BYTES(bmpfrom->size);
memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
memset(((char *)bmpto->maskp)+bytes, 0,
CPU_BYTES(bmpto->size)-bytes);
}
}
/*
* copy a numa.h nodemask_t structure to a bitmask map body
*/
void
copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
{
int max, i;
numa_bitmask_clearall(bmp);
max = (sizeof(nodemask_t)*8);
if (max > bmp->size)
max = bmp->size;
for (i=0; i<max; i++) {
if (nodemask_isset_compat(nmp, i))
numa_bitmask_setbit(bmp, i);
}
}
nodemask_t
numa_get_membind_v1(void)
{
int oldpolicy = -1;
struct bitmask *bmp;
nodemask_t nmp;
bmp = allocate_nodemask_v1();
getpol(&oldpolicy, bmp);
if (oldpolicy == MPOL_BIND) {
copy_bitmask_to_nodemask(bmp, &nmp);
} else {
/* copy the body of the map to numa_all_nodes */
copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
nmp = numa_all_nodes;
}
numa_bitmask_free(bmp);
return nmp;
}
backward_symver(numa_get_membind_v1,numa_get_membind);
struct bitmask *
numa_get_membind_v2(void)
{
int oldpolicy = -1;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&oldpolicy, bmp);
if (oldpolicy != MPOL_BIND)
copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
return bmp;
}
symver(numa_get_membind_v2,numa_get_membind);
//TODO: do we need a v1 nodemask_t version?
struct bitmask *numa_get_mems_allowed(void)
{
struct bitmask *bmp;
/*
* can change, so query on each call.
*/
bmp = numa_allocate_nodemask();
if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
MPOL_F_MEMS_ALLOWED) < 0)
numa_error("get_mempolicy");
return bmp;
}
make_internal_alias(numa_get_mems_allowed);
void numa_free(void *mem, size_t size)
{
munmap(mem, size);
}
int
numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
{
int i;
char *p = strchr(line, '\n');
if (!p)
return -1;
for (i = 0; p > line;i++) {
char *oldp, *endp;
oldp = p;
if (*p == ',')
--p;
while (p > line && *p != ',')
--p;
/* Eat two 32bit fields at a time to get longs */
if (p > line && sizeof(unsigned long) == 8) {
oldp--;
memmove(p, p+1, oldp-p+1);
while (p > line && *p != ',')
--p;
}
if (*p == ',')
p++;
if (i >= CPU_LONGS(ncpus))
return -1;
mask[i] = strtoul(p, &endp, 16);
if (endp != oldp)
return -1;
p--;
}
return 0;
}
backward_symver(numa_parse_bitmap_v1,numa_parse_bitmap);
int
numa_parse_bitmap_v2(char *line, struct bitmask *mask)
{
int i, ncpus;
char *p = strchr(line, '\n');
if (!p)
return -1;
ncpus = mask->size;
for (i = 0; p > line;i++) {
char *oldp, *endp;
oldp = p;
if (*p == ',')
--p;
while (p > line && *p != ',')
--p;
/* Eat two 32bit fields at a time to get longs */
if (p > line && sizeof(unsigned long) == 8) {
oldp--;
memmove(p, p+1, oldp-p+1);
while (p > line && *p != ',')
--p;
}
if (*p == ',')
p++;
if (i >= CPU_LONGS(ncpus))
return -1;
mask->maskp[i] = strtoul(p, &endp, 16);
if (endp != oldp)
return -1;
p--;
}
return 0;
}
symver(numa_parse_bitmap_v2,numa_parse_bitmap);
void
static init_node_cpu_mask_v2(void)
{
int nnodes = numa_max_possible_node_v2_int() + 1;
node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
}
/* This would be better with some locking, but I don't want to make libnuma
dependent on pthreads right now. The races are relatively harmless. */
int
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
{
int err = 0;
char fn[64];
FILE *f;
char *line = NULL;
size_t len = 0;
struct bitmask bitmask;
int buflen_needed;
unsigned long *mask;
int ncpus = numa_num_possible_cpus();
int maxnode = numa_max_node_int();
buflen_needed = CPU_BYTES(ncpus);
if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
errno = ERANGE;
return -1;
}
if (bufferlen > buflen_needed)
memset(buffer, 0, bufferlen);
if (node_cpu_mask_v1[node]) {
memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
return 0;
}
mask = malloc(buflen_needed);
if (!mask)
mask = (unsigned long *)buffer;
memset(mask, 0, buflen_needed);
sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
numa_warn(W_nosysfs2,
"/sys not mounted or invalid. Assuming one node: %s",
strerror(errno));
numa_warn(W_nosysfs2,
"(cannot open or correctly parse %s)", fn);
}
bitmask.maskp = (unsigned long *)mask;
bitmask.size = buflen_needed * 8;
numa_bitmask_setall(&bitmask);
err = -1;
}
if (f)
fclose(f);
if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
bitmask.maskp = (unsigned long *)mask;
bitmask.size = buflen_needed * 8;
numa_bitmask_setall(&bitmask);
err = -1;
}
free(line);
memcpy(buffer, mask, buflen_needed);
/* slightly racy, see above */
if (node_cpu_mask_v1[node]) {
if (mask != buffer)
free(mask);
} else {
node_cpu_mask_v1[node] = mask;
}
return err;
}
backward_symver(numa_node_to_cpus_v1,numa_node_to_cpus);
/*
* test whether a node has cpus
*/
/* This would be better with some locking, but I don't want to make libnuma
dependent on pthreads right now. The races are relatively harmless. */
/*
* deliver a bitmask of cpus representing the cpus on a given node
*/
int
numa_node_to_cpus_v2(int node, struct bitmask *buffer)
{
int err = 0;
int nnodes = numa_max_node();
char fn[64], *line = NULL;
FILE *f;
size_t len = 0;
struct bitmask *mask;
if (!node_cpu_mask_v2)
init_node_cpu_mask_v2();
if (node > nnodes) {
errno = ERANGE;
return -1;
}
numa_bitmask_clearall(buffer);
if (node_cpu_mask_v2[node]) {
/* have already constructed a mask for this node */
if (buffer->size < node_cpu_mask_v2[node]->size) {
errno = EINVAL;
numa_error("map size mismatch");
return -1;
}
copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
return 0;
}
/* need a new mask for this node */
mask = numa_allocate_cpumask();
/* this is a kernel cpumask_t (see node_read_cpumap()) */
sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
numa_warn(W_nosysfs2,
"/sys not mounted or invalid. Assuming one node: %s",
strerror(errno));
numa_warn(W_nosysfs2,
"(cannot open or correctly parse %s)", fn);
}
numa_bitmask_setall(mask);
err = -1;
}
if (f)
fclose(f);
if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
numa_bitmask_setall(mask);
err = -1;
}
free(line);
copy_bitmask_to_bitmask(mask, buffer);
/* slightly racy, see above */
/* save the mask we created */
if (node_cpu_mask_v2[node]) {
/* how could this be? */
if (mask != buffer)
numa_bitmask_free(mask);
} else {
/* we don't want to cache faulty result */
if (!err)
node_cpu_mask_v2[node] = mask;
else
numa_bitmask_free(mask);
}
return err;
}
symver(numa_node_to_cpus_v2,numa_node_to_cpus);
make_internal_alias(numa_node_to_cpus_v1);
make_internal_alias(numa_node_to_cpus_v2);
/* report the node of the specified cpu */
int numa_node_of_cpu(int cpu)
{
struct bitmask *bmp;
int ncpus, nnodes, node, ret;
ncpus = numa_num_possible_cpus();
if (cpu > ncpus){
errno = EINVAL;
return -1;
}
bmp = numa_bitmask_alloc(ncpus);
nnodes = numa_max_node();
for (node = 0; node <= nnodes; node++){
if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
/* It's possible for the node to not exist */
continue;
}
if (numa_bitmask_isbitset(bmp, cpu)){
ret = node;
goto end;
}
}
ret = -1;
errno = EINVAL;
end:
numa_bitmask_free(bmp);
return ret;
}
int
numa_run_on_node_mask_v1(const nodemask_t *mask)
{
int ncpus = numa_num_possible_cpus();
int i, k, err;
unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
memset(cpus, 0, CPU_BYTES(ncpus));
for (i = 0; i < NUMA_NUM_NODES; i++) {
if (mask->n[i / BITS_PER_LONG] == 0)
continue;
if (nodemask_isset_compat(mask, i)) {
if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus[k] |= nodecpus[k];
}
}
err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
/* The sched_setaffinity API is broken because it expects
the user to guess the kernel cpuset size. Do this in a
brute force way. */
if (err < 0 && errno == EINVAL) {
int savederrno = errno;
char *bigbuf;
static int size = -1;
if (size == -1)
size = CPU_BYTES(ncpus) * 2;
bigbuf = malloc(CPU_BUFFER_SIZE);
if (!bigbuf) {
errno = ENOMEM;
return -1;
}
errno = savederrno;
while (size <= CPU_BUFFER_SIZE) {
memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
memset(bigbuf + CPU_BYTES(ncpus), 0,
CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
if (err == 0 || errno != EINVAL)
break;
size *= 2;
}
savederrno = errno;
free(bigbuf);
errno = savederrno;
}
return err;
}
backward_symver(numa_run_on_node_mask_v1,numa_run_on_node_mask);
/*
* Given a node mask (size of a kernel nodemask_t) (probably populated by
* a user argument list) set up a map of cpus (map "cpus") on those nodes.
* Then set affinity to those cpus.
*/
int
numa_run_on_node_mask_v2(struct bitmask *bmp)
{
int ncpus, i, k, err;
struct bitmask *cpus, *nodecpus;
cpus = numa_allocate_cpumask();
ncpus = cpus->size;
nodecpus = numa_allocate_cpumask();
for (i = 0; i < bmp->size; i++) {
if (bmp->maskp[i / BITS_PER_LONG] == 0)
continue;
if (numa_bitmask_isbitset(bmp, i)) {
/*
* numa_all_nodes_ptr is cpuset aware; use only
* these nodes
*/
if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
numa_warn(W_noderunmask,
"node %d not allowed", i);
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus->maskp[k] |= nodecpus->maskp[k];
}
}
err = numa_sched_setaffinity_v2_int(0, cpus);
numa_bitmask_free(cpus);
numa_bitmask_free(nodecpus);
/* used to have to consider that this could fail - it shouldn't now */
if (err < 0) {
numa_error("numa_sched_setaffinity_v2_int() failed; abort\n");
}
return err;
}
symver(numa_run_on_node_mask_v2,numa_run_on_node_mask);
make_internal_alias(numa_run_on_node_mask_v2);
/*
* Given a node mask (size of a kernel nodemask_t) (probably populated by
* a user argument list) set up a map of cpus (map "cpus") on those nodes
* without any cpuset awareness. Then set affinity to those cpus.
*/
int
numa_run_on_node_mask_all(struct bitmask *bmp)
{
int ncpus, i, k, err;
struct bitmask *cpus, *nodecpus;
cpus = numa_allocate_cpumask();
ncpus = cpus->size;
nodecpus = numa_allocate_cpumask();
for (i = 0; i < bmp->size; i++) {
if (bmp->maskp[i / BITS_PER_LONG] == 0)
continue;
if (numa_bitmask_isbitset(bmp, i)) {
if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
numa_warn(W_noderunmask,
"node %d not allowed", i);
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus->maskp[k] |= nodecpus->maskp[k];
}
}
err = numa_sched_setaffinity_v2_int(0, cpus);
numa_bitmask_free(cpus);
numa_bitmask_free(nodecpus);
/* With possible nodes freedom it can happen easily now */
if (err < 0) {
numa_error("numa_sched_setaffinity_v2_int() failed");
}
return err;
}
nodemask_t
numa_get_run_node_mask_v1(void)
{
int ncpus = numa_num_configured_cpus();
int i, k;
int max = numa_max_node_int();
struct bitmask *bmp, *cpus, *nodecpus;
nodemask_t nmp;
cpus = numa_allocate_cpumask();
if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
nmp = numa_no_nodes;
goto free_cpus;
}
nodecpus = numa_allocate_cpumask();
bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
for (i = 0; i <= max; i++) {
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
/* It's possible for the node to not exist */
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++) {
if (nodecpus->maskp[k] & cpus->maskp[k])
numa_bitmask_setbit(bmp, i);
}
}
copy_bitmask_to_nodemask(bmp, &nmp);
numa_bitmask_free(bmp);
numa_bitmask_free(nodecpus);
free_cpus:
numa_bitmask_free(cpus);
return nmp;
}
backward_symver(numa_get_run_node_mask_v1,numa_get_run_node_mask);
struct bitmask *
numa_get_run_node_mask_v2(void)
{
int i, k;
int ncpus = numa_num_configured_cpus();
int max = numa_max_node_int();
struct bitmask *bmp, *cpus, *nodecpus;
bmp = numa_allocate_cpumask();
cpus = numa_allocate_cpumask();
if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
goto free_cpus;
}
nodecpus = numa_allocate_cpumask();
for (i = 0; i <= max; i++) {
/*
* numa_all_nodes_ptr is cpuset aware; show only
* these nodes
*/
if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
/* It's possible for the node to not exist */
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++) {
if (nodecpus->maskp[k] & cpus->maskp[k])
numa_bitmask_setbit(bmp, i);
}
}
numa_bitmask_free(nodecpus);
free_cpus:
numa_bitmask_free(cpus);
return bmp;
}
symver(numa_get_run_node_mask_v2,numa_get_run_node_mask);
int
numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
{
int numa_num_nodes = numa_num_possible_nodes();
return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
tonodes->maskp);
}
int numa_move_pages(int pid, unsigned long count,
void **pages, const int *nodes, int *status, int flags)
{
return move_pages(pid, count, pages, nodes, status, flags);
}
int numa_run_on_node(int node)
{
int numa_num_nodes = numa_num_possible_nodes();
int ret = -1;
struct bitmask *cpus;
if (node >= numa_num_nodes){
errno = EINVAL;
goto out;
}
cpus = numa_allocate_cpumask();
if (node == -1)
numa_bitmask_setall(cpus);
else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
goto free;
}
ret = numa_sched_setaffinity_v2_int(0, cpus);
free:
numa_bitmask_free(cpus);
out:
return ret;
}
int numa_preferred(void)
{
int policy;
int ret;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&policy, bmp);
if (policy == MPOL_PREFERRED || policy == MPOL_BIND) {
int i;
int max = numa_num_possible_nodes();
for (i = 0; i < max ; i++)
if (numa_bitmask_isbitset(bmp, i)){
ret = i;
goto end;
}
}
/* could read the current CPU from /proc/self/status. Probably
not worth it. */
ret = 0; /* or random one? */
end:
numa_bitmask_free(bmp);
return ret;
}
void numa_set_preferred(int node)
{
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
if (node >= 0) {
numa_bitmask_setbit(bmp, node);
setpol(MPOL_PREFERRED, bmp);
} else
setpol(MPOL_DEFAULT, bmp);
numa_bitmask_free(bmp);
}
void numa_set_localalloc(void)
{
setpol(MPOL_DEFAULT, numa_no_nodes_ptr);
}
void numa_bind_v1(const nodemask_t *nodemask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)nodemask;
bitmask.size = sizeof(nodemask_t);
numa_run_on_node_mask_v2_int(&bitmask);
numa_set_membind_v2_int(&bitmask);
}
backward_symver(numa_bind_v1,numa_bind);
void numa_bind_v2(struct bitmask *bmp)
{
numa_run_on_node_mask_v2_int(bmp);
numa_set_membind_v2_int(bmp);
}
symver(numa_bind_v2,numa_bind);
void numa_set_strict(int flag)
{
if (flag)
mbind_flags |= MPOL_MF_STRICT;
else
mbind_flags &= ~MPOL_MF_STRICT;
}
/*
* Extract a node or processor number from the given string.
* Allow a relative node / processor specification within the allowed
* set if "relative" is nonzero
*/
static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
{
long i, nr;
if (!relative)
return strtoul(s, end, 0);
nr = strtoul(s, end, 0);
if (s == *end)
return nr;
/* Find the nth set bit */
for (i = 0; nr >= 0 && i <= bmp->size; i++)
if (numa_bitmask_isbitset(bmp, i))
nr--;
return i-1;
}
/*
* __numa_parse_nodestring() is called to create a node mask, given
* an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
* (the + indicates that the numbers are nodeset-relative)
*
* The nodes may be specified as absolute, or relative to the current nodeset.
* The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
* which may represent all nodes or the nodes in the current nodeset.
*
* The caller must free the returned bitmask.
*/
static struct bitmask *
__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
{
int invert = 0, relative = 0;
int conf_nodes = numa_num_configured_nodes();
char *end;
struct bitmask *mask;
mask = numa_allocate_nodemask();
if (s[0] == 0){
copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
return mask; /* return freeable mask */
}
if (*s == '!') {
invert = 1;
s++;
}
if (*s == '+') {
relative++;
s++;
}
do {
unsigned long arg;
int i;
if (isalpha(*s)) {
int n;
if (!strcmp(s,"all")) {
copy_bitmask_to_bitmask(allowed_nodes_ptr,
mask);
s+=4;
break;
}
n = resolve_affinity(s, mask);
if (n != NO_IO_AFFINITY) {
if (n < 0)
goto err;
s += strlen(s) + 1;
break;
}
}
arg = get_nr(s, &end, allowed_nodes_ptr, relative);
if (end == s) {
numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
numa_warn(W_nodeparse, "node argument %d is out of range\n", arg);
goto err;
}
i = arg;
numa_bitmask_setbit(mask, i);
s = end;
if (*s == '-') {
char *end2;
unsigned long arg2;
arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
if (end2 == s) {
numa_warn(W_nodeparse, "missing node argument %s\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
numa_warn(W_nodeparse, "node argument %d out of range\n", arg2);
goto err;
}
while (arg <= arg2) {
i = arg;
if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
numa_bitmask_setbit(mask, i);
arg++;
}
s = end2;
}
} while (*s++ == ',');
if (s[-1] != '\0')
goto err;
if (invert) {
int i;
for (i = 0; i < conf_nodes; i++) {
if (numa_bitmask_isbitset(mask, i))
numa_bitmask_clearbit(mask, i);
else
numa_bitmask_setbit(mask, i);
}
}
return mask;
err:
numa_bitmask_free(mask);
return NULL;
}
/*
* numa_parse_nodestring() is called to create a bitmask from nodes available
* for this task.
*/
struct bitmask * numa_parse_nodestring(const char *s)
{
return __numa_parse_nodestring(s, numa_all_nodes_ptr);
}
/*
* numa_parse_nodestring_all() is called to create a bitmask from all nodes
* available.
*/
struct bitmask * numa_parse_nodestring_all(const char *s)
{
return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
}
/*
* __numa_parse_cpustring() is called to create a bitmask, given
* an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
* (the + indicates that the numbers are cpuset-relative)
*
* The cpus may be specified as absolute, or relative to the current cpuset.
* The list of available cpus for this task is in the map pointed to by
* "allowed_cpus_ptr", which may represent all cpus or the cpus in the
* current cpuset.
*
* The caller must free the returned bitmask.
*/
static struct bitmask *
__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
{
int invert = 0, relative=0;
int conf_cpus = numa_num_configured_cpus();
char *end;
struct bitmask *mask;
mask = numa_allocate_cpumask();
if (s[0] == 0)
return mask;
if (*s == '!') {
invert = 1;
s++;
}
if (*s == '+') {
relative++;
s++;
}
do {
unsigned long arg;
int i;
if (!strcmp(s,"all")) {
copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
s+=4;
break;
}
arg = get_nr(s, &end, allowed_cpus_ptr, relative);
if (end == s) {
numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
goto err;
}
i = arg;
numa_bitmask_setbit(mask, i);
s = end;
if (*s == '-') {
char *end2;
unsigned long arg2;
int i;
arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
if (end2 == s) {
numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
goto err;
}
while (arg <= arg2) {
i = arg;
if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
numa_bitmask_setbit(mask, i);
arg++;
}
s = end2;
}
} while (*s++ == ',');
if (s[-1] != '\0')
goto err;
if (invert) {
int i;
for (i = 0; i < conf_cpus; i++) {
if (numa_bitmask_isbitset(mask, i))
numa_bitmask_clearbit(mask, i);
else
numa_bitmask_setbit(mask, i);
}
}
return mask;
err:
numa_bitmask_free(mask);
return NULL;
}
/*
* numa_parse_cpustring() is called to create a bitmask from cpus available
* for this task.
*/
struct bitmask * numa_parse_cpustring(const char *s)
{
return __numa_parse_cpustring(s, numa_all_cpus_ptr);
}
/*
* numa_parse_cpustring_all() is called to create a bitmask from all cpus
* available.
*/
struct bitmask * numa_parse_cpustring_all(const char *s)
{
return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
}