blob: a2b2d9d175eef3db00bd4fd197488d8dfce7bed0 [file] [log] [blame]
/* Copyright (C) 2003,2004,2005 Andi Kleen, SuSE Labs.
Command line NUMA policy control.
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define _GNU_SOURCE
#include <getopt.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <ctype.h>
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
#include "shm.h"
#define CPUSET 0
#define ALL 1
int exitcode;
struct option opts[] = {
{"all", 0, 0, 'a'},
{"interleave", 1, 0, 'i' },
{"preferred", 1, 0, 'p' },
{"cpubind", 1, 0, 'c' },
{"cpunodebind", 1, 0, 'N' },
{"physcpubind", 1, 0, 'C' },
{"membind", 1, 0, 'm'},
{"show", 0, 0, 's' },
{"localalloc", 0,0, 'l'},
{"hardware", 0,0,'H' },
{"shm", 1, 0, 'S'},
{"file", 1, 0, 'f'},
{"offset", 1, 0, 'o'},
{"length", 1, 0, 'L'},
{"strict", 0, 0, 't'},
{"shmmode", 1, 0, 'M'},
{"dump", 0, 0, 'd'},
{"dump-nodes", 0, 0, 'D'},
{"shmid", 1, 0, 'I'},
{"huge", 0, 0, 'u'},
{"touch", 0, 0, 'T'},
{"verify", 0, 0, 'V'}, /* undocumented - for debugging */
{ 0 }
};
void usage(void)
{
fprintf(stderr,
"usage: numactl [--all | -a] [--interleave= | -i <nodes>] [--preferred= | -p <node>]\n"
" [--physcpubind= | -C <cpus>] [--cpunodebind= | -N <nodes>]\n"
" [--membind= | -m <nodes>] [--localalloc | -l] command args ...\n"
" numactl [--show | -s]\n"
" numactl [--hardware | -H]\n"
" numactl [--length | -l <length>] [--offset | -o <offset>] [--shmmode | -M <shmmode>]\n"
" [--strict | -t]\n"
" [--shmid | -I <id>] --shm | -S <shmkeyfile>\n"
" [--shmid | -I <id>] --file | -f <tmpfsfile>\n"
" [--huge | -u] [--touch | -T] \n"
" memory policy | --dump | -d | --dump-nodes | -D\n"
"\n"
"memory policy is --interleave | -i, --preferred | -p, --membind | -m, --localalloc | -l\n"
"<nodes> is a comma delimited list of node numbers or A-B ranges or all.\n"
"Instead of a number a node can also be:\n"
" netdev:DEV the node connected to network device DEV\n"
" file:PATH the node the block device of path is connected to\n"
" ip:HOST the node of the network device host routes through\n"
" block:PATH the node of block device path\n"
" pci:[seg:]bus:dev[:func] The node of a PCI device\n"
"<cpus> is a comma delimited list of cpu numbers or A-B ranges or all\n"
"all ranges can be inverted with !\n"
"all numbers and ranges can be made cpuset-relative with +\n"
"the old --cpubind argument is deprecated.\n"
"use --cpunodebind or --physcpubind instead\n"
"<length> can have g (GB), m (MB) or k (KB) suffixes\n");
exit(1);
}
void usage_msg(char *msg, ...)
{
va_list ap;
va_start(ap,msg);
fprintf(stderr, "numactl: ");
vfprintf(stderr, msg, ap);
putchar('\n');
usage();
}
void show_physcpubind(void)
{
int ncpus = numa_num_configured_cpus();
for (;;) {
struct bitmask *cpubuf;
cpubuf = numa_bitmask_alloc(ncpus);
if (numa_sched_getaffinity(0, cpubuf) < 0) {
if (errno == EINVAL && ncpus < 1024*1024) {
ncpus *= 2;
continue;
}
err("sched_get_affinity");
}
printmask("physcpubind", cpubuf);
break;
}
}
void show(void)
{
unsigned long prefnode;
struct bitmask *membind, *interleave, *cpubind;
unsigned long cur;
int policy;
if (numa_available() < 0) {
show_physcpubind();
printf("No NUMA support available on this system.\n");
exit(1);
}
cpubind = numa_get_run_node_mask();
prefnode = numa_preferred();
interleave = numa_get_interleave_mask();
membind = numa_get_membind();
cur = numa_get_interleave_node();
policy = 0;
if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0)
perror("get_mempolicy");
printf("policy: %s\n", policy_name(policy));
printf("preferred node: ");
switch (policy) {
case MPOL_PREFERRED:
if (prefnode != -1) {
printf("%ld\n", prefnode);
break;
}
/*FALL THROUGH*/
case MPOL_DEFAULT:
printf("current\n");
break;
case MPOL_INTERLEAVE:
printf("%ld (interleave next)\n",cur);
break;
case MPOL_BIND:
printf("%d\n", find_first(membind));
break;
}
if (policy == MPOL_INTERLEAVE) {
printmask("interleavemask", interleave);
printf("interleavenode: %ld\n", cur);
}
show_physcpubind();
printmask("cpubind", cpubind); // for compatibility
printmask("nodebind", cpubind);
printmask("membind", membind);
}
char *fmt_mem(unsigned long long mem, char *buf)
{
if (mem == -1L)
sprintf(buf, "<not available>");
else
sprintf(buf, "%llu MB", mem >> 20);
return buf;
}
static void print_distances(int maxnode)
{
int i,k;
int fst = 0;
for (i = 0; i <= maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
fst = i;
break;
}
if (numa_distance(maxnode,fst) == 0) {
printf("No distance information available.\n");
return;
}
printf("node distances:\n");
printf("node ");
for (i = 0; i <= maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
printf("% 3d ", i);
printf("\n");
for (i = 0; i <= maxnode; i++) {
if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
continue;
printf("% 3d: ", i);
for (k = 0; k <= maxnode; k++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i) &&
numa_bitmask_isbitset(numa_nodes_ptr, k))
printf("% 3d ", numa_distance(i,k));
printf("\n");
}
}
void print_node_cpus(int node)
{
int i, err;
struct bitmask *cpus;
cpus = numa_allocate_cpumask();
err = numa_node_to_cpus(node, cpus);
if (err >= 0) {
for (i = 0; i < cpus->size; i++)
if (numa_bitmask_isbitset(cpus, i))
printf(" %d", i);
}
putchar('\n');
}
void hardware(void)
{
int i;
int numnodes=0;
int prevnode=-1;
int skip=0;
int maxnode = numa_max_node();
if (numa_available() < 0) {
printf("No NUMA available on this system\n");
exit(1);
}
for (i=0; i<=maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
numnodes++;
printf("available: %d nodes (", numnodes);
for (i=0; i<=maxnode; i++) {
if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
if (prevnode == -1) {
printf("%d", i);
prevnode=i;
continue;
}
if (i > prevnode + 1) {
if (skip) {
printf("%d", prevnode);
skip=0;
}
printf(",%d", i);
prevnode=i;
continue;
}
if (i == prevnode + 1) {
if (!skip) {
printf("-");
skip=1;
}
prevnode=i;
}
if ((i == maxnode) && skip)
printf("%d", prevnode);
}
}
printf(")\n");
for (i = 0; i <= maxnode; i++) {
char buf[64];
long long fr;
unsigned long long sz = numa_node_size64(i, &fr);
if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
continue;
printf("node %d cpus:", i);
print_node_cpus(i);
printf("node %d size: %s\n", i, fmt_mem(sz, buf));
printf("node %d free: %s\n", i, fmt_mem(fr, buf));
}
print_distances(maxnode);
}
void checkerror(char *s)
{
if (errno) {
perror(s);
exit(1);
}
}
void checknuma(void)
{
static int numa = -1;
if (numa < 0) {
if (numa_available() < 0)
complain("This system does not support NUMA policy");
}
numa = 0;
}
int set_policy = -1;
void setpolicy(int pol)
{
if (set_policy != -1)
usage_msg("Conflicting policies");
set_policy = pol;
}
void nopolicy(void)
{
if (set_policy >= 0)
usage_msg("specify policy after --shm/--file");
}
int did_cpubind = 0;
int did_strict = 0;
int do_shm = 0;
int do_dump = 0;
int shmattached = 0;
int did_node_cpu_parse = 0;
int parse_all = 0;
char *shmoption;
void check_cpubind(int flag)
{
if (flag)
usage_msg("cannot do --cpubind on shared memory\n");
}
void noshm(char *opt)
{
if (shmattached)
usage_msg("%s must be before shared memory specification", opt);
shmoption = opt;
}
void dontshm(char *opt)
{
if (shmoption)
usage_msg("%s shm option is not allowed before %s", shmoption, opt);
}
void needshm(char *opt)
{
if (!shmattached)
usage_msg("%s must be after shared memory specification", opt);
}
void check_all_parse(int flag)
{
if (did_node_cpu_parse)
usage_msg("--all/-a option must be before all cpu/node specifications");
}
void get_short_opts(struct option *o, char *s)
{
*s++ = '+';
while (o->name) {
if (isprint(o->val)) {
*s++ = o->val;
if (o->has_arg)
*s++ = ':';
}
o++;
}
*s = '\0';
}
void check_shmbeyond(char *msg)
{
if (shmoffset >= shmlen) {
fprintf(stderr,
"numactl: region offset %#llx beyond its length %#llx at %s\n",
shmoffset, shmlen, msg);
exit(1);
}
}
static struct bitmask *numactl_parse_nodestring(char *s, int flag)
{
static char *last;
if (s[0] == 's' && !strcmp(s, "same")) {
if (!last)
usage_msg("same needs previous node specification");
s = last;
} else {
last = s;
}
if (flag == ALL)
return numa_parse_nodestring_all(s);
else
return numa_parse_nodestring(s);
}
int main(int ac, char **av)
{
int c, i, nnodes=0;
long node=-1;
char *end;
char shortopts[array_len(opts)*2 + 1];
struct bitmask *mask = NULL;
get_short_opts(opts,shortopts);
while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
switch (c) {
case 's': /* --show */
show();
exit(0);
case 'H': /* --hardware */
nopolicy();
hardware();
exit(0);
case 'i': /* --interleave */
checknuma();
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
did_node_cpu_parse = 1;
setpolicy(MPOL_INTERLEAVE);
if (shmfd >= 0)
numa_interleave_memory(shmptr, shmlen, mask);
else
numa_set_interleave_mask(mask);
checkerror("setting interleave mask");
break;
case 'N': /* --cpunodebind */
case 'c': /* --cpubind */
dontshm("-c/--cpubind/--cpunodebind");
checknuma();
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
did_node_cpu_parse = 1;
numa_run_on_node_mask_all(mask);
checkerror("sched_setaffinity");
break;
case 'C': /* --physcpubind */
{
struct bitmask *cpubuf;
dontshm("-C/--physcpubind");
if (parse_all)
cpubuf = numa_parse_cpustring_all(optarg);
else
cpubuf = numa_parse_cpustring(optarg);
if (!cpubuf) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
did_node_cpu_parse = 1;
numa_sched_setaffinity(0, cpubuf);
checkerror("sched_setaffinity");
free(cpubuf);
break;
}
case 'm': /* --membind */
checknuma();
setpolicy(MPOL_BIND);
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
did_node_cpu_parse = 1;
numa_set_bind_policy(1);
if (shmfd >= 0) {
numa_tonodemask_memory(shmptr, shmlen, mask);
} else {
numa_set_membind(mask);
}
numa_set_bind_policy(0);
checkerror("setting membind");
break;
case 'p': /* --preferred */
checknuma();
setpolicy(MPOL_PREFERRED);
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
for (i=0; i<mask->size; i++) {
if (numa_bitmask_isbitset(mask, i)) {
node = i;
nnodes++;
}
}
if (nnodes != 1)
usage();
numa_bitmask_free(mask);
errno = 0;
did_node_cpu_parse = 1;
numa_set_bind_policy(0);
if (shmfd >= 0)
numa_tonode_memory(shmptr, shmlen, node);
else
numa_set_preferred(node);
checkerror("setting preferred node");
break;
case 'l': /* --local */
checknuma();
setpolicy(MPOL_DEFAULT);
errno = 0;
if (shmfd >= 0)
numa_setlocal_memory(shmptr, shmlen);
else
numa_set_localalloc();
checkerror("local allocation");
break;
case 'S': /* --shm */
check_cpubind(did_cpubind);
nopolicy();
attach_sysvshm(optarg, "--shm");
shmattached = 1;
break;
case 'f': /* --file */
check_cpubind(did_cpubind);
nopolicy();
attach_shared(optarg, "--file");
shmattached = 1;
break;
case 'L': /* --length */
noshm("--length");
shmlen = memsize(optarg);
break;
case 'M': /* --shmmode */
noshm("--shmmode");
shmmode = strtoul(optarg, &end, 8);
if (end == optarg || *end)
usage();
break;
case 'd': /* --dump */
if (shmfd < 0)
complain(
"Cannot do --dump without shared memory.\n");
dump_shm();
do_dump = 1;
break;
case 'D': /* --dump-nodes */
if (shmfd < 0)
complain(
"Cannot do --dump-nodes without shared memory.\n");
dump_shm_nodes();
do_dump = 1;
break;
case 't': /* --strict */
did_strict = 1;
numa_set_strict(1);
break;
case 'I': /* --shmid */
shmid = strtoul(optarg, &end, 0);
if (end == optarg || *end)
usage();
break;
case 'u': /* --huge */
noshm("--huge");
shmflags |= SHM_HUGETLB;
break;
case 'o': /* --offset */
noshm("--offset");
shmoffset = memsize(optarg);
break;
case 'T': /* --touch */
needshm("--touch");
check_shmbeyond("--touch");
numa_police_memory(shmptr, shmlen);
break;
case 'V': /* --verify */
needshm("--verify");
if (set_policy < 0)
complain("Need a policy first to verify");
check_shmbeyond("--verify");
numa_police_memory(shmptr, shmlen);
if (!mask)
complain("Need a mask to verify");
else
verify_shm(set_policy, mask);
break;
case 'a': /* --all */
check_all_parse(did_node_cpu_parse);
parse_all = 1;
break;
default:
usage();
}
}
av += optind;
ac -= optind;
if (shmfd >= 0) {
if (*av)
usage();
exit(exitcode);
}
if (did_strict)
fprintf(stderr,
"numactl: warning. Strict flag for process ignored.\n");
if (do_dump)
usage_msg("cannot do --dump|--dump-shm for process");
if (shmoption)
usage_msg("shm related option %s for process", shmoption);
if (*av == NULL)
usage();
execvp(*av, av);
complain("execution of `%s': %s\n", av[0], strerror(errno));
return 0; /* not reached */
}