| /* Copyright (C) 2003,2004,2005 Andi Kleen, SuSE Labs. |
| Command line NUMA policy control. |
| |
| numactl is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public |
| License as published by the Free Software Foundation; version |
| 2. |
| |
| numactl is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should find a copy of v2 of the GNU General Public License somewhere |
| on your Linux system; if not, write to the Free Software Foundation, |
| Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ |
| #define _GNU_SOURCE |
| #include <getopt.h> |
| #include <errno.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <stdarg.h> |
| #include <ctype.h> |
| #include "numa.h" |
| #include "numaif.h" |
| #include "numaint.h" |
| #include "util.h" |
| #include "shm.h" |
| |
| #define CPUSET 0 |
| #define ALL 1 |
| |
| int exitcode; |
| |
| struct option opts[] = { |
| {"all", 0, 0, 'a'}, |
| {"interleave", 1, 0, 'i' }, |
| {"preferred", 1, 0, 'p' }, |
| {"cpubind", 1, 0, 'c' }, |
| {"cpunodebind", 1, 0, 'N' }, |
| {"physcpubind", 1, 0, 'C' }, |
| {"membind", 1, 0, 'm'}, |
| {"show", 0, 0, 's' }, |
| {"localalloc", 0,0, 'l'}, |
| {"hardware", 0,0,'H' }, |
| |
| {"shm", 1, 0, 'S'}, |
| {"file", 1, 0, 'f'}, |
| {"offset", 1, 0, 'o'}, |
| {"length", 1, 0, 'L'}, |
| {"strict", 0, 0, 't'}, |
| {"shmmode", 1, 0, 'M'}, |
| {"dump", 0, 0, 'd'}, |
| {"dump-nodes", 0, 0, 'D'}, |
| {"shmid", 1, 0, 'I'}, |
| {"huge", 0, 0, 'u'}, |
| {"touch", 0, 0, 'T'}, |
| {"verify", 0, 0, 'V'}, /* undocumented - for debugging */ |
| { 0 } |
| }; |
| |
| void usage(void) |
| { |
| fprintf(stderr, |
| "usage: numactl [--all | -a] [--interleave= | -i <nodes>] [--preferred= | -p <node>]\n" |
| " [--physcpubind= | -C <cpus>] [--cpunodebind= | -N <nodes>]\n" |
| " [--membind= | -m <nodes>] [--localalloc | -l] command args ...\n" |
| " numactl [--show | -s]\n" |
| " numactl [--hardware | -H]\n" |
| " numactl [--length | -l <length>] [--offset | -o <offset>] [--shmmode | -M <shmmode>]\n" |
| " [--strict | -t]\n" |
| " [--shmid | -I <id>] --shm | -S <shmkeyfile>\n" |
| " [--shmid | -I <id>] --file | -f <tmpfsfile>\n" |
| " [--huge | -u] [--touch | -T] \n" |
| " memory policy | --dump | -d | --dump-nodes | -D\n" |
| "\n" |
| "memory policy is --interleave | -i, --preferred | -p, --membind | -m, --localalloc | -l\n" |
| "<nodes> is a comma delimited list of node numbers or A-B ranges or all.\n" |
| "Instead of a number a node can also be:\n" |
| " netdev:DEV the node connected to network device DEV\n" |
| " file:PATH the node the block device of path is connected to\n" |
| " ip:HOST the node of the network device host routes through\n" |
| " block:PATH the node of block device path\n" |
| " pci:[seg:]bus:dev[:func] The node of a PCI device\n" |
| "<cpus> is a comma delimited list of cpu numbers or A-B ranges or all\n" |
| "all ranges can be inverted with !\n" |
| "all numbers and ranges can be made cpuset-relative with +\n" |
| "the old --cpubind argument is deprecated.\n" |
| "use --cpunodebind or --physcpubind instead\n" |
| "<length> can have g (GB), m (MB) or k (KB) suffixes\n"); |
| exit(1); |
| } |
| |
| void usage_msg(char *msg, ...) |
| { |
| va_list ap; |
| va_start(ap,msg); |
| fprintf(stderr, "numactl: "); |
| vfprintf(stderr, msg, ap); |
| putchar('\n'); |
| usage(); |
| } |
| |
| void show_physcpubind(void) |
| { |
| int ncpus = numa_num_configured_cpus(); |
| |
| for (;;) { |
| struct bitmask *cpubuf; |
| |
| cpubuf = numa_bitmask_alloc(ncpus); |
| |
| if (numa_sched_getaffinity(0, cpubuf) < 0) { |
| if (errno == EINVAL && ncpus < 1024*1024) { |
| ncpus *= 2; |
| continue; |
| } |
| err("sched_get_affinity"); |
| } |
| printmask("physcpubind", cpubuf); |
| break; |
| } |
| } |
| |
| void show(void) |
| { |
| unsigned long prefnode; |
| struct bitmask *membind, *interleave, *cpubind; |
| unsigned long cur; |
| int policy; |
| |
| if (numa_available() < 0) { |
| show_physcpubind(); |
| printf("No NUMA support available on this system.\n"); |
| exit(1); |
| } |
| |
| cpubind = numa_get_run_node_mask(); |
| |
| prefnode = numa_preferred(); |
| interleave = numa_get_interleave_mask(); |
| membind = numa_get_membind(); |
| cur = numa_get_interleave_node(); |
| |
| policy = 0; |
| if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0) |
| perror("get_mempolicy"); |
| |
| printf("policy: %s\n", policy_name(policy)); |
| |
| printf("preferred node: "); |
| switch (policy) { |
| case MPOL_PREFERRED: |
| if (prefnode != -1) { |
| printf("%ld\n", prefnode); |
| break; |
| } |
| /*FALL THROUGH*/ |
| case MPOL_DEFAULT: |
| printf("current\n"); |
| break; |
| case MPOL_INTERLEAVE: |
| printf("%ld (interleave next)\n",cur); |
| break; |
| case MPOL_BIND: |
| printf("%d\n", find_first(membind)); |
| break; |
| } |
| if (policy == MPOL_INTERLEAVE) { |
| printmask("interleavemask", interleave); |
| printf("interleavenode: %ld\n", cur); |
| } |
| show_physcpubind(); |
| printmask("cpubind", cpubind); // for compatibility |
| printmask("nodebind", cpubind); |
| printmask("membind", membind); |
| } |
| |
| char *fmt_mem(unsigned long long mem, char *buf) |
| { |
| if (mem == -1L) |
| sprintf(buf, "<not available>"); |
| else |
| sprintf(buf, "%llu MB", mem >> 20); |
| return buf; |
| } |
| |
| static void print_distances(int maxnode) |
| { |
| int i,k; |
| int fst = 0; |
| |
| for (i = 0; i <= maxnode; i++) |
| if (numa_bitmask_isbitset(numa_nodes_ptr, i)) { |
| fst = i; |
| break; |
| } |
| if (numa_distance(maxnode,fst) == 0) { |
| printf("No distance information available.\n"); |
| return; |
| } |
| printf("node distances:\n"); |
| printf("node "); |
| for (i = 0; i <= maxnode; i++) |
| if (numa_bitmask_isbitset(numa_nodes_ptr, i)) |
| printf("% 3d ", i); |
| printf("\n"); |
| for (i = 0; i <= maxnode; i++) { |
| if (!numa_bitmask_isbitset(numa_nodes_ptr, i)) |
| continue; |
| printf("% 3d: ", i); |
| for (k = 0; k <= maxnode; k++) |
| if (numa_bitmask_isbitset(numa_nodes_ptr, i) && |
| numa_bitmask_isbitset(numa_nodes_ptr, k)) |
| printf("% 3d ", numa_distance(i,k)); |
| printf("\n"); |
| } |
| } |
| |
| void print_node_cpus(int node) |
| { |
| int i, err; |
| struct bitmask *cpus; |
| |
| cpus = numa_allocate_cpumask(); |
| err = numa_node_to_cpus(node, cpus); |
| if (err >= 0) { |
| for (i = 0; i < cpus->size; i++) |
| if (numa_bitmask_isbitset(cpus, i)) |
| printf(" %d", i); |
| } |
| putchar('\n'); |
| } |
| |
| void hardware(void) |
| { |
| int i; |
| int numnodes=0; |
| int prevnode=-1; |
| int skip=0; |
| int maxnode = numa_max_node(); |
| |
| if (numa_available() < 0) { |
| printf("No NUMA available on this system\n"); |
| exit(1); |
| } |
| |
| for (i=0; i<=maxnode; i++) |
| if (numa_bitmask_isbitset(numa_nodes_ptr, i)) |
| numnodes++; |
| printf("available: %d nodes (", numnodes); |
| for (i=0; i<=maxnode; i++) { |
| if (numa_bitmask_isbitset(numa_nodes_ptr, i)) { |
| if (prevnode == -1) { |
| printf("%d", i); |
| prevnode=i; |
| continue; |
| } |
| |
| if (i > prevnode + 1) { |
| if (skip) { |
| printf("%d", prevnode); |
| skip=0; |
| } |
| printf(",%d", i); |
| prevnode=i; |
| continue; |
| } |
| |
| if (i == prevnode + 1) { |
| if (!skip) { |
| printf("-"); |
| skip=1; |
| } |
| prevnode=i; |
| } |
| |
| if ((i == maxnode) && skip) |
| printf("%d", prevnode); |
| } |
| } |
| printf(")\n"); |
| |
| for (i = 0; i <= maxnode; i++) { |
| char buf[64]; |
| long long fr; |
| unsigned long long sz = numa_node_size64(i, &fr); |
| if (!numa_bitmask_isbitset(numa_nodes_ptr, i)) |
| continue; |
| |
| printf("node %d cpus:", i); |
| print_node_cpus(i); |
| printf("node %d size: %s\n", i, fmt_mem(sz, buf)); |
| printf("node %d free: %s\n", i, fmt_mem(fr, buf)); |
| } |
| print_distances(maxnode); |
| } |
| |
| void checkerror(char *s) |
| { |
| if (errno) { |
| perror(s); |
| exit(1); |
| } |
| } |
| |
| void checknuma(void) |
| { |
| static int numa = -1; |
| if (numa < 0) { |
| if (numa_available() < 0) |
| complain("This system does not support NUMA policy"); |
| } |
| numa = 0; |
| } |
| |
| int set_policy = -1; |
| |
| void setpolicy(int pol) |
| { |
| if (set_policy != -1) |
| usage_msg("Conflicting policies"); |
| set_policy = pol; |
| } |
| |
| void nopolicy(void) |
| { |
| if (set_policy >= 0) |
| usage_msg("specify policy after --shm/--file"); |
| } |
| |
| int did_cpubind = 0; |
| int did_strict = 0; |
| int do_shm = 0; |
| int do_dump = 0; |
| int shmattached = 0; |
| int did_node_cpu_parse = 0; |
| int parse_all = 0; |
| char *shmoption; |
| |
| void check_cpubind(int flag) |
| { |
| if (flag) |
| usage_msg("cannot do --cpubind on shared memory\n"); |
| } |
| |
| void noshm(char *opt) |
| { |
| if (shmattached) |
| usage_msg("%s must be before shared memory specification", opt); |
| shmoption = opt; |
| } |
| |
| void dontshm(char *opt) |
| { |
| if (shmoption) |
| usage_msg("%s shm option is not allowed before %s", shmoption, opt); |
| } |
| |
| void needshm(char *opt) |
| { |
| if (!shmattached) |
| usage_msg("%s must be after shared memory specification", opt); |
| } |
| |
| void check_all_parse(int flag) |
| { |
| if (did_node_cpu_parse) |
| usage_msg("--all/-a option must be before all cpu/node specifications"); |
| } |
| |
| void get_short_opts(struct option *o, char *s) |
| { |
| *s++ = '+'; |
| while (o->name) { |
| if (isprint(o->val)) { |
| *s++ = o->val; |
| if (o->has_arg) |
| *s++ = ':'; |
| } |
| o++; |
| } |
| *s = '\0'; |
| } |
| |
| void check_shmbeyond(char *msg) |
| { |
| if (shmoffset >= shmlen) { |
| fprintf(stderr, |
| "numactl: region offset %#llx beyond its length %#llx at %s\n", |
| shmoffset, shmlen, msg); |
| exit(1); |
| } |
| } |
| |
| static struct bitmask *numactl_parse_nodestring(char *s, int flag) |
| { |
| static char *last; |
| |
| if (s[0] == 's' && !strcmp(s, "same")) { |
| if (!last) |
| usage_msg("same needs previous node specification"); |
| s = last; |
| } else { |
| last = s; |
| } |
| |
| if (flag == ALL) |
| return numa_parse_nodestring_all(s); |
| else |
| return numa_parse_nodestring(s); |
| } |
| |
| int main(int ac, char **av) |
| { |
| int c, i, nnodes=0; |
| long node=-1; |
| char *end; |
| char shortopts[array_len(opts)*2 + 1]; |
| struct bitmask *mask = NULL; |
| |
| get_short_opts(opts,shortopts); |
| while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) { |
| switch (c) { |
| case 's': /* --show */ |
| show(); |
| exit(0); |
| case 'H': /* --hardware */ |
| nopolicy(); |
| hardware(); |
| exit(0); |
| case 'i': /* --interleave */ |
| checknuma(); |
| if (parse_all) |
| mask = numactl_parse_nodestring(optarg, ALL); |
| else |
| mask = numactl_parse_nodestring(optarg, CPUSET); |
| if (!mask) { |
| printf ("<%s> is invalid\n", optarg); |
| usage(); |
| } |
| |
| errno = 0; |
| did_node_cpu_parse = 1; |
| setpolicy(MPOL_INTERLEAVE); |
| if (shmfd >= 0) |
| numa_interleave_memory(shmptr, shmlen, mask); |
| else |
| numa_set_interleave_mask(mask); |
| checkerror("setting interleave mask"); |
| break; |
| case 'N': /* --cpunodebind */ |
| case 'c': /* --cpubind */ |
| dontshm("-c/--cpubind/--cpunodebind"); |
| checknuma(); |
| if (parse_all) |
| mask = numactl_parse_nodestring(optarg, ALL); |
| else |
| mask = numactl_parse_nodestring(optarg, CPUSET); |
| if (!mask) { |
| printf ("<%s> is invalid\n", optarg); |
| usage(); |
| } |
| errno = 0; |
| check_cpubind(do_shm); |
| did_cpubind = 1; |
| did_node_cpu_parse = 1; |
| numa_run_on_node_mask_all(mask); |
| checkerror("sched_setaffinity"); |
| break; |
| case 'C': /* --physcpubind */ |
| { |
| struct bitmask *cpubuf; |
| dontshm("-C/--physcpubind"); |
| if (parse_all) |
| cpubuf = numa_parse_cpustring_all(optarg); |
| else |
| cpubuf = numa_parse_cpustring(optarg); |
| if (!cpubuf) { |
| printf ("<%s> is invalid\n", optarg); |
| usage(); |
| } |
| errno = 0; |
| check_cpubind(do_shm); |
| did_cpubind = 1; |
| did_node_cpu_parse = 1; |
| numa_sched_setaffinity(0, cpubuf); |
| checkerror("sched_setaffinity"); |
| free(cpubuf); |
| break; |
| } |
| case 'm': /* --membind */ |
| checknuma(); |
| setpolicy(MPOL_BIND); |
| if (parse_all) |
| mask = numactl_parse_nodestring(optarg, ALL); |
| else |
| mask = numactl_parse_nodestring(optarg, CPUSET); |
| if (!mask) { |
| printf ("<%s> is invalid\n", optarg); |
| usage(); |
| } |
| errno = 0; |
| did_node_cpu_parse = 1; |
| numa_set_bind_policy(1); |
| if (shmfd >= 0) { |
| numa_tonodemask_memory(shmptr, shmlen, mask); |
| } else { |
| numa_set_membind(mask); |
| } |
| numa_set_bind_policy(0); |
| checkerror("setting membind"); |
| break; |
| case 'p': /* --preferred */ |
| checknuma(); |
| setpolicy(MPOL_PREFERRED); |
| if (parse_all) |
| mask = numactl_parse_nodestring(optarg, ALL); |
| else |
| mask = numactl_parse_nodestring(optarg, CPUSET); |
| if (!mask) { |
| printf ("<%s> is invalid\n", optarg); |
| usage(); |
| } |
| for (i=0; i<mask->size; i++) { |
| if (numa_bitmask_isbitset(mask, i)) { |
| node = i; |
| nnodes++; |
| } |
| } |
| if (nnodes != 1) |
| usage(); |
| numa_bitmask_free(mask); |
| errno = 0; |
| did_node_cpu_parse = 1; |
| numa_set_bind_policy(0); |
| if (shmfd >= 0) |
| numa_tonode_memory(shmptr, shmlen, node); |
| else |
| numa_set_preferred(node); |
| checkerror("setting preferred node"); |
| break; |
| case 'l': /* --local */ |
| checknuma(); |
| setpolicy(MPOL_DEFAULT); |
| errno = 0; |
| if (shmfd >= 0) |
| numa_setlocal_memory(shmptr, shmlen); |
| else |
| numa_set_localalloc(); |
| checkerror("local allocation"); |
| break; |
| case 'S': /* --shm */ |
| check_cpubind(did_cpubind); |
| nopolicy(); |
| attach_sysvshm(optarg, "--shm"); |
| shmattached = 1; |
| break; |
| case 'f': /* --file */ |
| check_cpubind(did_cpubind); |
| nopolicy(); |
| attach_shared(optarg, "--file"); |
| shmattached = 1; |
| break; |
| case 'L': /* --length */ |
| noshm("--length"); |
| shmlen = memsize(optarg); |
| break; |
| case 'M': /* --shmmode */ |
| noshm("--shmmode"); |
| shmmode = strtoul(optarg, &end, 8); |
| if (end == optarg || *end) |
| usage(); |
| break; |
| case 'd': /* --dump */ |
| if (shmfd < 0) |
| complain( |
| "Cannot do --dump without shared memory.\n"); |
| dump_shm(); |
| do_dump = 1; |
| break; |
| case 'D': /* --dump-nodes */ |
| if (shmfd < 0) |
| complain( |
| "Cannot do --dump-nodes without shared memory.\n"); |
| dump_shm_nodes(); |
| do_dump = 1; |
| break; |
| case 't': /* --strict */ |
| did_strict = 1; |
| numa_set_strict(1); |
| break; |
| case 'I': /* --shmid */ |
| shmid = strtoul(optarg, &end, 0); |
| if (end == optarg || *end) |
| usage(); |
| break; |
| |
| case 'u': /* --huge */ |
| noshm("--huge"); |
| shmflags |= SHM_HUGETLB; |
| break; |
| |
| case 'o': /* --offset */ |
| noshm("--offset"); |
| shmoffset = memsize(optarg); |
| break; |
| |
| case 'T': /* --touch */ |
| needshm("--touch"); |
| check_shmbeyond("--touch"); |
| numa_police_memory(shmptr, shmlen); |
| break; |
| |
| case 'V': /* --verify */ |
| needshm("--verify"); |
| if (set_policy < 0) |
| complain("Need a policy first to verify"); |
| check_shmbeyond("--verify"); |
| numa_police_memory(shmptr, shmlen); |
| if (!mask) |
| complain("Need a mask to verify"); |
| else |
| verify_shm(set_policy, mask); |
| break; |
| |
| case 'a': /* --all */ |
| check_all_parse(did_node_cpu_parse); |
| parse_all = 1; |
| break; |
| default: |
| usage(); |
| } |
| } |
| |
| av += optind; |
| ac -= optind; |
| |
| if (shmfd >= 0) { |
| if (*av) |
| usage(); |
| exit(exitcode); |
| } |
| |
| if (did_strict) |
| fprintf(stderr, |
| "numactl: warning. Strict flag for process ignored.\n"); |
| |
| if (do_dump) |
| usage_msg("cannot do --dump|--dump-shm for process"); |
| |
| if (shmoption) |
| usage_msg("shm related option %s for process", shmoption); |
| |
| if (*av == NULL) |
| usage(); |
| execvp(*av, av); |
| complain("execution of `%s': %s\n", av[0], strerror(errno)); |
| return 0; /* not reached */ |
| } |