| /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
| |
| #include <fcntl.h> |
| #include <sys/ioctl.h> |
| #include <sys/mount.h> |
| |
| #include "errno-util.h" |
| #include "fd-util.h" |
| #include "fileio.h" |
| #include "missing_fs.h" |
| #include "missing_magic.h" |
| #include "missing_sched.h" |
| #include "namespace-util.h" |
| #include "process-util.h" |
| #include "stat-util.h" |
| #include "stdio-util.h" |
| #include "user-util.h" |
| |
| const struct namespace_info namespace_info[] = { |
| [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, |
| [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, |
| [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, |
| /* So, the mount namespace flag is called CLONE_NEWNS for historical |
| * reasons. Let's expose it here under a more explanatory name: "mnt". |
| * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ |
| [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, |
| [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, |
| [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, |
| [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, |
| [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, |
| { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, |
| }; |
| |
| #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) |
| |
| int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { |
| _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF; |
| int rfd = -EBADF; |
| |
| assert(pid >= 0); |
| |
| if (mntns_fd) { |
| const char *mntns; |
| |
| mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); |
| mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
| if (mntnsfd < 0) |
| return -errno; |
| } |
| |
| if (pidns_fd) { |
| const char *pidns; |
| |
| pidns = pid_namespace_path(pid, NAMESPACE_PID); |
| pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
| if (pidnsfd < 0) |
| return -errno; |
| } |
| |
| if (netns_fd) { |
| const char *netns; |
| |
| netns = pid_namespace_path(pid, NAMESPACE_NET); |
| netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
| if (netnsfd < 0) |
| return -errno; |
| } |
| |
| if (userns_fd) { |
| const char *userns; |
| |
| userns = pid_namespace_path(pid, NAMESPACE_USER); |
| usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
| if (usernsfd < 0 && errno != ENOENT) |
| return -errno; |
| } |
| |
| if (root_fd) { |
| const char *root; |
| |
| root = procfs_file_alloca(pid, "root"); |
| rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); |
| if (rfd < 0) |
| return -errno; |
| } |
| |
| if (pidns_fd) |
| *pidns_fd = TAKE_FD(pidnsfd); |
| |
| if (mntns_fd) |
| *mntns_fd = TAKE_FD(mntnsfd); |
| |
| if (netns_fd) |
| *netns_fd = TAKE_FD(netnsfd); |
| |
| if (userns_fd) |
| *userns_fd = TAKE_FD(usernsfd); |
| |
| if (root_fd) |
| *root_fd = TAKE_FD(rfd); |
| |
| return 0; |
| } |
| |
| int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { |
| int r; |
| |
| if (userns_fd >= 0) { |
| /* Can't setns to your own userns, since then you could escalate from non-root to root in |
| * your own namespace, so check if namespaces are equal before attempting to enter. */ |
| |
| r = files_same(FORMAT_PROC_FD_PATH(userns_fd), "/proc/self/ns/user", 0); |
| if (r < 0) |
| return r; |
| if (r) |
| userns_fd = -EBADF; |
| } |
| |
| if (pidns_fd >= 0) |
| if (setns(pidns_fd, CLONE_NEWPID) < 0) |
| return -errno; |
| |
| if (mntns_fd >= 0) |
| if (setns(mntns_fd, CLONE_NEWNS) < 0) |
| return -errno; |
| |
| if (netns_fd >= 0) |
| if (setns(netns_fd, CLONE_NEWNET) < 0) |
| return -errno; |
| |
| if (userns_fd >= 0) |
| if (setns(userns_fd, CLONE_NEWUSER) < 0) |
| return -errno; |
| |
| if (root_fd >= 0) { |
| if (fchdir(root_fd) < 0) |
| return -errno; |
| |
| if (chroot(".") < 0) |
| return -errno; |
| } |
| |
| return reset_uid_gid(); |
| } |
| |
| int fd_is_ns(int fd, unsigned long nsflag) { |
| struct statfs s; |
| int r; |
| |
| /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone(). |
| * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN), |
| * so that callers can handle this somewhat nicely. |
| * |
| * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not |
| * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */ |
| |
| if (fstatfs(fd, &s) < 0) |
| return -errno; |
| |
| if (!is_fs_type(&s, NSFS_MAGIC)) { |
| /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs |
| * instead. Handle that in a somewhat smart way. */ |
| |
| if (is_fs_type(&s, PROC_SUPER_MAGIC)) { |
| struct statfs t; |
| |
| /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the |
| * passed fd might refer to a network namespace, but we can't know for sure. In that case, |
| * return a recognizable error. */ |
| |
| if (statfs("/proc/self/ns/net", &t) < 0) |
| return -errno; |
| |
| if (s.f_type == t.f_type) |
| return -EUCLEAN; /* It's possible, we simply don't know */ |
| } |
| |
| return 0; /* No! */ |
| } |
| |
| r = ioctl(fd, NS_GET_NSTYPE); |
| if (r < 0) { |
| if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */ |
| return -EUCLEAN; |
| |
| return -errno; |
| } |
| |
| return (unsigned long) r == nsflag; |
| } |
| |
| int detach_mount_namespace(void) { |
| |
| /* Detaches the mount namespace, disabling propagation from our namespace to the host */ |
| |
| if (unshare(CLONE_NEWNS) < 0) |
| return -errno; |
| |
| return RET_NERRNO(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)); |
| } |
| |
| int userns_acquire(const char *uid_map, const char *gid_map) { |
| char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1]; |
| _cleanup_(sigkill_waitp) pid_t pid = 0; |
| _cleanup_close_ int userns_fd = -EBADF; |
| int r; |
| |
| assert(uid_map); |
| assert(gid_map); |
| |
| /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it, |
| * and then kills the process again. This way we have a userns fd that is not bound to any |
| * process. We can use that for file system mounts and similar. */ |
| |
| r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid); |
| if (r < 0) |
| return r; |
| if (r == 0) |
| /* Child. We do nothing here, just freeze until somebody kills us. */ |
| freeze(); |
| |
| xsprintf(path, "/proc/" PID_FMT "/uid_map", pid); |
| r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); |
| if (r < 0) |
| return log_error_errno(r, "Failed to write UID map: %m"); |
| |
| xsprintf(path, "/proc/" PID_FMT "/gid_map", pid); |
| r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER); |
| if (r < 0) |
| return log_error_errno(r, "Failed to write GID map: %m"); |
| |
| r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); |
| if (r < 0) |
| return log_error_errno(r, "Failed to open userns fd: %m"); |
| |
| return TAKE_FD(userns_fd); |
| |
| } |
| |
| int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { |
| const char *ns_path; |
| struct stat ns_st1, ns_st2; |
| |
| if (pid1 == 0) |
| pid1 = getpid_cached(); |
| |
| if (pid2 == 0) |
| pid2 = getpid_cached(); |
| |
| if (pid1 == pid2) |
| return 1; |
| |
| ns_path = pid_namespace_path(pid1, type); |
| if (stat(ns_path, &ns_st1) < 0) |
| return -errno; |
| |
| ns_path = pid_namespace_path(pid2, type); |
| if (stat(ns_path, &ns_st2) < 0) |
| return -errno; |
| |
| return stat_inode_same(&ns_st1, &ns_st2); |
| } |