blob: aa172940ff2b5e648487489eee2370a2931da0e4 [file]
/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*-
*
* This file is part of PRoot.
*
* Copyright (C) 2014 STMicroelectronics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA.
*/
#include <sched.h> /* CLONE_*, */
#include <sys/types.h> /* pid_t, size_t, */
#include <stdlib.h> /* NULL, */
#include <assert.h> /* assert(3), */
#include <string.h> /* bzero(3), */
#include <stdbool.h> /* bool, true, false, */
#include <sys/queue.h> /* LIST_*, */
#include <talloc.h> /* talloc_*, */
#include <signal.h> /* kill(2), SIGKILL, */
#include <sys/ptrace.h> /* ptrace(2), PTRACE_*, */
#include <errno.h> /* E*, */
#include "tracee/tracee.h"
#include "tracee/reg.h"
#include "path/binding.h"
#include "syscall/sysnum.h"
#include "tracee/event.h"
#include "ptrace/ptrace.h"
#include "ptrace/wait.h"
#include "extension/extension.h"
#include "cli/note.h"
#include "compat.h"
typedef LIST_HEAD(tracees, tracee) Tracees;
static Tracees tracees;
/**
* Remove @zombie from its parent's list of zombies. Note: this is a
* talloc destructor.
*/
static int remove_zombie(Tracee *zombie)
{
LIST_REMOVE(zombie, link);
return 0;
}
/**
* Perform some specific treatments against @pointer according to its
* type, before it gets unlinked from @tracee_->life_context.
*/
static void clean_life_span_object(const void *pointer, int depth UNUSED,
int max_depth UNUSED, int is_ref UNUSED, void *tracee_)
{
Binding *binding;
Tracee *tracee;
tracee = talloc_get_type_abort(tracee_, Tracee);
/* So far, only bindings need a special treatment. */
binding = talloc_get_type(pointer, Binding);
if (binding != NULL)
remove_binding_from_all_lists(tracee, binding);
}
/**
* Remove @tracee from the list of tracees and update all of its
* children & ptracees, and its ptracer. Note: this is a talloc
* destructor.
*/
static int remove_tracee(Tracee *tracee)
{
Tracee *relative;
Tracee *ptracer;
int event;
LIST_REMOVE(tracee, link);
/* Clean objects that are linked to this tracee's life
* span. */
talloc_report_depth_cb(tracee->life_context, 0, 100, clean_life_span_object, tracee);
/* This could be optimize by using a dedicated list of
* children and ptracees. */
LIST_FOREACH(relative, &tracees, link) {
/* Its children are now orphan. */
if (relative->parent == tracee)
relative->parent = NULL;
/* Its tracees are now free. */
if (relative->as_ptracee.ptracer == tracee) {
/* Release the pending event, if any. */
relative->as_ptracee.ptracer = NULL;
if (relative->as_ptracee.event4.proot.pending) {
event = handle_tracee_event(relative,
relative->as_ptracee.event4.proot.value);
(void) restart_tracee(relative, event);
}
else if (relative->as_ptracee.event4.ptracer.pending) {
event = relative->as_ptracee.event4.proot.value;
(void) restart_tracee(relative, event);
}
bzero(&relative->as_ptracee, sizeof(relative->as_ptracee));
}
}
/* Nothing else to do if it's not a ptracee. */
ptracer = tracee->as_ptracee.ptracer;
if (ptracer == NULL)
return 0;
/* Zombify this ptracee until its ptracer is notified about
* its death. */
event = tracee->as_ptracee.event4.ptracer.value;
if (tracee->as_ptracee.event4.ptracer.pending
&& (WIFEXITED(event) || WIFSIGNALED(event))) {
Tracee *zombie;
zombie = new_dummy_tracee(ptracer);
if (zombie != NULL) {
LIST_INSERT_HEAD(&PTRACER.zombies, zombie, link);
talloc_set_destructor(zombie, remove_zombie);
zombie->parent = tracee->parent;
zombie->clone = tracee->clone;
zombie->pid = tracee->pid;
detach_from_ptracer(tracee);
attach_to_ptracer(zombie, ptracer);
zombie->as_ptracee.event4.ptracer.pending = true;
zombie->as_ptracee.event4.ptracer.value = event;
zombie->as_ptracee.is_zombie = true;
return 0;
}
/* Fallback to the common path. */
}
detach_from_ptracer(tracee);
/* Wake its ptracer if there's nothing else to wait for. */
if (PTRACER.nb_ptracees == 0 && PTRACER.wait_pid != 0) {
/* Update the return value of ptracer's wait(2). */
poke_reg(ptracer, SYSARG_RESULT, -ECHILD);
/* Don't forget to write its register cache back. */
(void) push_regs(ptracer);
PTRACER.wait_pid = 0;
(void) restart_tracee(ptracer, 0);
}
return 0;
}
/**
* Allocate a new entry for a dummy tracee (no pid, no destructor, not
* in the list of tracees, ...). The new allocated memory is attached
* to the given @context. This function returns NULL if an error
* occurred (ENOMEM), otherwise it returns the newly allocated
* structure.
*/
Tracee *new_dummy_tracee(TALLOC_CTX *context)
{
Tracee *tracee;
tracee = talloc_zero(context, Tracee);
if (tracee == NULL)
return NULL;
/* Allocate a memory collector. */
tracee->ctx = talloc_new(tracee);
if (tracee->ctx == NULL)
goto no_mem;
/* By default new tracees have an empty file-system
* name-space and heap. */
tracee->fs = talloc_zero(tracee, FileSystemNameSpace);
tracee->heap = talloc_zero(tracee, Heap);
if (tracee->fs == NULL || tracee->heap == NULL)
goto no_mem;
return tracee;
no_mem:
TALLOC_FREE(tracee);
return NULL;
}
/**
* Allocate a new entry for the tracee @pid, then set its destructor
* and add it to the list of tracees. This function returns NULL if
* an error occurred (ENOMEM), otherwise it returns the newly
* allocated structure.
*/
static Tracee *new_tracee(pid_t pid)
{
Tracee *tracee;
tracee = new_dummy_tracee(NULL);
if (tracee == NULL)
return NULL;
talloc_set_destructor(tracee, remove_tracee);
tracee->pid = pid;
LIST_INSERT_HEAD(&tracees, tracee, link);
tracee->life_context = talloc_new(tracee);
return tracee;
}
/**
* Return the first [stopped?] tracee with the given
* @pid (-1 for any) which has the given @ptracer, and which has a
* pending event for its ptracer if @only_with_pevent is true. See
* wait(2) manual for the meaning of @wait_options. This function
* returns NULL if there's no such ptracee.
*/
static Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped,
bool only_with_pevent, word_t wait_options)
{
Tracee *ptracee;
/* Return zombies first. */
LIST_FOREACH(ptracee, &PTRACER.zombies, link) {
/* Not the ptracee you're looking for? */
if (pid != ptracee->pid && pid != -1)
continue;
/* Not the expected kind of cloned process? */
if (!EXPECTED_WAIT_CLONE(wait_options, ptracee))
continue;
return ptracee;
}
LIST_FOREACH(ptracee, &tracees, link) {
/* Discard tracees that don't have this ptracer. */
if (PTRACEE.ptracer != ptracer)
continue;
/* Not the ptracee you're looking for? */
if (pid != ptracee->pid && pid != -1)
continue;
/* Not the expected kind of cloned process? */
if (!EXPECTED_WAIT_CLONE(wait_options, ptracee))
continue;
/* No need to do more checks if its stopped state
* doesn't matter. Be careful when using such
* maybe-running tracee. */
if (!only_stopped)
return ptracee;
/* Is this tracee in the stopped state? */
if (ptracee->running)
continue;
/* Has a pending event for its ptracer? */
if (PTRACEE.event4.ptracer.pending || !only_with_pevent)
return ptracee;
/* No need to go further if the specific tracee isn't
* in the expected state? */
if (pid == ptracee->pid)
return NULL;
}
return NULL;
}
/**
* Wrapper for get_ptracee(), this ensures only a stopped tracee is
* returned (or NULL).
*/
Tracee *get_stopped_ptracee(const Tracee *ptracer, pid_t pid,
bool only_with_pevent, word_t wait_options)
{
return get_ptracee(ptracer, pid, true, only_with_pevent, wait_options);
}
/**
* Wrapper for get_ptracee(), this ensures no running tracee is
* returned.
*/
bool has_ptracees(const Tracee *ptracer, pid_t pid, word_t wait_options)
{
return (get_ptracee(ptracer, pid, false, false, wait_options) != NULL);
}
/**
* Return the entry related to the tracee @pid. If no entry were
* found, a new one is created if @create is true, otherwise NULL is
* returned.
*/
Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create)
{
Tracee *tracee;
/* Don't reset the memory collector if the searched tracee is
* the current one: there's likely pointers to the
* sub-allocated data in the caller. */
if (current_tracee != NULL && current_tracee->pid == pid)
return (Tracee *)current_tracee;
LIST_FOREACH(tracee, &tracees, link) {
if (tracee->pid == pid) {
/* Flush then allocate a new memory collector. */
TALLOC_FREE(tracee->ctx);
tracee->ctx = talloc_new(tracee);
return tracee;
}
}
return (create ? new_tracee(pid) : NULL);
}
/**
* Free all tracees marked as terminated.
*/
void free_terminated_tracees()
{
Tracee *next;
/* Items can't be deleted when using LIST_FOREACH. */
next = tracees.lh_first;
while (next != NULL) {
Tracee *tracee = next;
next = tracee->link.le_next;
if (tracee->terminated)
TALLOC_FREE(tracee);
}
}
/**
* Make new @parent's child inherit from it. Depending on
* @clone_flags, some information are copied or shared. This function
* returns -errno if an error occured, otherwise 0.
*/
int new_child(Tracee *parent, word_t clone_flags)
{
int ptrace_options;
unsigned long pid;
Tracee *child;
int status;
/* If the tracee calls clone(2) with the CLONE_VFORK flag,
* PTRACE_EVENT_VFORK will be delivered instead [...];
* otherwise if the tracee calls clone(2) with the exit signal
* set to SIGCHLD, PTRACE_EVENT_FORK will be delivered [...]
*
* -- ptrace(2) man-page
*
* That means we have to check if it's actually a clone(2) in
* order to get the right flags.
*/
status = fetch_regs(parent);
if (status >= 0 && get_sysnum(parent, CURRENT) == PR_clone)
clone_flags = peek_reg(parent, CURRENT, SYSARG_1);
/* Get the pid of the parent's new child. */
status = ptrace(PTRACE_GETEVENTMSG, parent->pid, NULL, &pid);
if (status < 0 || pid == 0) {
note(parent, WARNING, SYSTEM, "ptrace(GETEVENTMSG)");
return status;
}
child = get_tracee(parent, (pid_t) pid, true);
if (child == NULL) {
note(parent, WARNING, SYSTEM, "running out of memory");
return -ENOMEM;
}
/* Sanity checks. */
assert(child != NULL
&& child->exe == NULL
&& child->fs->cwd == NULL
&& child->fs->bindings.pending == NULL
&& child->fs->bindings.guest == NULL
&& child->fs->bindings.host == NULL
&& child->qemu == NULL
&& child->glue == NULL
&& child->parent == NULL
&& child->as_ptracee.ptracer == NULL);
child->verbose = parent->verbose;
child->seccomp = parent->seccomp;
child->sysexit_pending = parent->sysexit_pending;
/* If CLONE_VM is set, the calling process and the child
* process run in the same memory space [...] any memory
* mapping or unmapping performed with mmap(2) or munmap(2) by
* the child or calling process also affects the other
* process.
*
* If CLONE_VM is not set, the child process runs in a
* separate copy of the memory space of the calling process at
* the time of clone(). Memory writes or file
* mappings/unmappings performed by one of the processes do
* not affect the other, as with fork(2).
*
* -- clone(2) man-page
*/
TALLOC_FREE(child->heap);
child->heap = ((clone_flags & CLONE_VM) != 0)
? talloc_reference(child, parent->heap)
: talloc_memdup(child, parent->heap, sizeof(Heap));
if (child->heap == NULL)
return -ENOMEM;
/* If CLONE_PARENT is set, then the parent of the new child
* (as returned by getppid(2)) will be the same as that of the
* calling process.
*
* If CLONE_PARENT is not set, then (as with fork(2)) the
* child's parent is the calling process.
*
* -- clone(2) man-page
*/
if ((clone_flags & CLONE_PARENT) != 0)
child->parent = parent->parent;
else
child->parent = parent;
/* Remember if this child belongs to the same thread group as
* its parent. This is currently useful for ptrace emulation
* only but it deserves to be extended to support execve(2)
* specificity (ie. when a thread calls execve(2), its pid
* gets replaced by the pid of its thread group leader). */
child->clone = ((clone_flags & CLONE_THREAD) != 0);
/* Depending on how the new process is created, it may be
* automatically traced by the parent's tracer. */
ptrace_options = ( clone_flags == 0 ? PTRACE_O_TRACEFORK
: (clone_flags & 0xFF) == SIGCHLD ? PTRACE_O_TRACEFORK
: (clone_flags & CLONE_VFORK) != 0 ? PTRACE_O_TRACEVFORK
: PTRACE_O_TRACECLONE);
if (parent->as_ptracee.ptracer != NULL
&& ( (ptrace_options & parent->as_ptracee.options) != 0
|| (clone_flags & CLONE_PTRACE) != 0)) {
attach_to_ptracer(child, parent->as_ptracee.ptracer);
/* All these flags are inheritable, no matter why this
* child is being traced. */
child->as_ptracee.options |= (parent->as_ptracee.options
& ( PTRACE_O_TRACECLONE
| PTRACE_O_TRACEEXEC
| PTRACE_O_TRACEEXIT
| PTRACE_O_TRACEFORK
| PTRACE_O_TRACESYSGOOD
| PTRACE_O_TRACEVFORK
| PTRACE_O_TRACEVFORKDONE));
}
/* If CLONE_FS is set, the parent and the child process share
* the same file system information. This includes the root
* of the file system, the current working directory, and the
* umask. Any call to chroot(2), chdir(2), or umask(2)
* performed by the parent process or the child process also
* affects the other process.
*
* If CLONE_FS is not set, the child process works on a copy
* of the file system information of the parent process at the
* time of the clone() call. Calls to chroot(2), chdir(2),
* umask(2) performed later by one of the processes do not
* affect the other process.
*
* -- clone(2) man-page
*/
TALLOC_FREE(child->fs);
if ((clone_flags & CLONE_FS) != 0) {
/* File-system name-space is shared. */
child->fs = talloc_reference(child, parent->fs);
}
else {
/* File-system name-space is copied. */
child->fs = talloc_zero(child, FileSystemNameSpace);
if (child->fs == NULL)
return -ENOMEM;
child->fs->cwd = talloc_strdup(child->fs, parent->fs->cwd);
if (child->fs->cwd == NULL)
return -ENOMEM;
talloc_set_name_const(child->fs->cwd, "$cwd");
/* Bindings are shared across file-system name-spaces since a
* "mount --bind" made by a process affects all other processes
* under Linux. Actually they are copied when a sub
* reconfiguration occured (nested proot or chroot(2)). */
child->fs->bindings.guest = talloc_reference(child->fs, parent->fs->bindings.guest);
child->fs->bindings.host = talloc_reference(child->fs, parent->fs->bindings.host);
}
/* The path to the executable is unshared only once the child
* process does a call to execve(2). */
child->exe = talloc_reference(child, parent->exe);
child->qemu = talloc_reference(child, parent->qemu);
child->glue = talloc_reference(child, parent->glue);
child->host_ldso_paths = talloc_reference(child, parent->host_ldso_paths);
child->guest_ldso_paths = talloc_reference(child, parent->guest_ldso_paths);
child->tool_name = parent->tool_name;
inherit_extensions(child, parent, clone_flags);
/* Restart the child tracee if it was already alive but
* stopped until that moment. */
if (child->sigstop == SIGSTOP_PENDING) {
bool keep_stopped = false;
child->sigstop = SIGSTOP_ALLOWED;
/* Notify its ptracer if it is ready to be traced. */
if (child->as_ptracee.ptracer != NULL) {
/* Sanity check. */
assert(!child->as_ptracee.tracing_started);
keep_stopped = handle_ptracee_event(child, __W_STOPCODE(SIGSTOP));
/* Note that this event was already handled by
* PRoot since child->as_ptracee.ptracer was
* NULL up to now. */
child->as_ptracee.event4.proot.pending = false;
child->as_ptracee.event4.proot.value = 0;
}
if (!keep_stopped)
(void) restart_tracee(child, 0);
}
return 0;
}
/**
* Helper for swap_config().
*/
static void reparent_config(Tracee *new_parent, Tracee *old_parent)
{
new_parent->verbose = old_parent->verbose;
#define REPARENT(field) do { \
talloc_reparent(old_parent, new_parent, old_parent->field); \
new_parent->field = old_parent->field; \
} while(0);
REPARENT(fs);
REPARENT(exe);
REPARENT(qemu);
REPARENT(glue);
REPARENT(extensions);
#undef REPARENT
}
/**
* Swap configuration (pointers and parentality) between @tracee1 and @tracee2.
*/
int swap_config(Tracee *tracee1, Tracee *tracee2)
{
Tracee *tmp;
tmp = talloc_zero(tracee1->ctx, Tracee);
if (tmp == NULL)
return -ENOMEM;
reparent_config(tmp, tracee1);
reparent_config(tracee1, tracee2);
reparent_config(tracee2, tmp);
return 0;
}
/* Send the KILL signal to all tracees. */
void kill_all_tracees()
{
Tracee *tracee;
LIST_FOREACH(tracee, &tracees, link)
kill(tracee->pid, SIGKILL);
}