| /* Copyright (C) 2002-2014 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include <assert.h> |
| #include <errno.h> |
| #include <signal.h> |
| #include <stdint.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <sys/mman.h> |
| #include <sys/param.h> |
| #include <dl-sysdep.h> |
| #include <dl-tls.h> |
| #include <tls.h> |
| #include <list.h> |
| #include <lowlevellock.h> |
| #include <kernel-features.h> |
| |
| |
| #ifndef NEED_SEPARATE_REGISTER_STACK |
| |
| /* Most architectures have exactly one stack pointer. Some have more. */ |
| # define STACK_VARIABLES void *stackaddr = NULL |
| |
| /* How to pass the values to the 'create_thread' function. */ |
| # define STACK_VARIABLES_ARGS stackaddr |
| |
| /* How to declare function which gets there parameters. */ |
| # define STACK_VARIABLES_PARMS void *stackaddr |
| |
| /* How to declare allocate_stack. */ |
| # define ALLOCATE_STACK_PARMS void **stack |
| |
| /* This is how the function is called. We do it this way to allow |
| other variants of the function to have more parameters. */ |
| # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr) |
| |
| #else |
| |
| /* We need two stacks. The kernel will place them but we have to tell |
| the kernel about the size of the reserved address space. */ |
| # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0 |
| |
| /* How to pass the values to the 'create_thread' function. */ |
| # define STACK_VARIABLES_ARGS stackaddr, stacksize |
| |
| /* How to declare function which gets there parameters. */ |
| # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize |
| |
| /* How to declare allocate_stack. */ |
| # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize |
| |
| /* This is how the function is called. We do it this way to allow |
| other variants of the function to have more parameters. */ |
| # define ALLOCATE_STACK(attr, pd) \ |
| allocate_stack (attr, pd, &stackaddr, &stacksize) |
| |
| #endif |
| |
| |
| /* Default alignment of stack. */ |
| #ifndef STACK_ALIGN |
| # define STACK_ALIGN __alignof__ (long double) |
| #endif |
| |
| /* Default value for minimal stack size after allocating thread |
| descriptor and guard. */ |
| #ifndef MINIMAL_REST_STACK |
| # define MINIMAL_REST_STACK 4096 |
| #endif |
| |
| |
| /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for |
| a stack. Use it when possible. */ |
| #ifndef MAP_STACK |
| # define MAP_STACK 0 |
| #endif |
| |
| /* This yields the pointer that TLS support code calls the thread pointer. */ |
| #if TLS_TCB_AT_TP |
| # define TLS_TPADJ(pd) (pd) |
| #elif TLS_DTV_AT_TP |
| # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE)) |
| #endif |
| |
| /* Cache handling for not-yet free stacks. */ |
| |
| /* Maximum size in kB of cache. */ |
| static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */ |
| static size_t stack_cache_actsize; |
| |
| /* Mutex protecting this variable. */ |
| static int stack_cache_lock = LLL_LOCK_INITIALIZER; |
| |
| /* List of queued stack frames. */ |
| static LIST_HEAD (stack_cache); |
| |
| /* List of the stacks in use. */ |
| static LIST_HEAD (stack_used); |
| |
| /* We need to record what list operations we are going to do so that, |
| in case of an asynchronous interruption due to a fork() call, we |
| can correct for the work. */ |
| static uintptr_t in_flight_stack; |
| |
| /* List of the threads with user provided stacks in use. No need to |
| initialize this, since it's done in __pthread_initialize_minimal. */ |
| list_t __stack_user __attribute__ ((nocommon)); |
| hidden_data_def (__stack_user) |
| |
| #if COLORING_INCREMENT != 0 |
| /* Number of threads created. */ |
| static unsigned int nptl_ncreated; |
| #endif |
| |
| |
| /* Check whether the stack is still used or not. */ |
| #define FREE_P(descr) ((descr)->tid <= 0) |
| |
| |
| static void |
| stack_list_del (list_t *elem) |
| { |
| in_flight_stack = (uintptr_t) elem; |
| |
| atomic_write_barrier (); |
| |
| list_del (elem); |
| |
| atomic_write_barrier (); |
| |
| in_flight_stack = 0; |
| } |
| |
| |
| static void |
| stack_list_add (list_t *elem, list_t *list) |
| { |
| in_flight_stack = (uintptr_t) elem | 1; |
| |
| atomic_write_barrier (); |
| |
| list_add (elem, list); |
| |
| atomic_write_barrier (); |
| |
| in_flight_stack = 0; |
| } |
| |
| |
| /* We create a double linked list of all cache entries. Double linked |
| because this allows removing entries from the end. */ |
| |
| |
| /* Get a stack frame from the cache. We have to match by size since |
| some blocks might be too small or far too large. */ |
| static struct pthread * |
| get_cached_stack (size_t *sizep, void **memp) |
| { |
| size_t size = *sizep; |
| struct pthread *result = NULL; |
| list_t *entry; |
| |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Search the cache for a matching entry. We search for the |
| smallest stack which has at least the required size. Note that |
| in normal situations the size of all allocated stacks is the |
| same. As the very least there are only a few different sizes. |
| Therefore this loop will exit early most of the time with an |
| exact match. */ |
| list_for_each (entry, &stack_cache) |
| { |
| struct pthread *curr; |
| |
| curr = list_entry (entry, struct pthread, list); |
| if (FREE_P (curr) && curr->stackblock_size >= size) |
| { |
| if (curr->stackblock_size == size) |
| { |
| result = curr; |
| break; |
| } |
| |
| if (result == NULL |
| || result->stackblock_size > curr->stackblock_size) |
| result = curr; |
| } |
| } |
| |
| if (__builtin_expect (result == NULL, 0) |
| /* Make sure the size difference is not too excessive. In that |
| case we do not use the block. */ |
| || __builtin_expect (result->stackblock_size > 4 * size, 0)) |
| { |
| /* Release the lock. */ |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| return NULL; |
| } |
| |
| /* Don't allow setxid until cloned. */ |
| result->setxid_futex = -1; |
| |
| /* Dequeue the entry. */ |
| stack_list_del (&result->list); |
| |
| /* And add to the list of stacks in use. */ |
| stack_list_add (&result->list, &stack_used); |
| |
| /* And decrease the cache size. */ |
| stack_cache_actsize -= result->stackblock_size; |
| |
| /* Release the lock early. */ |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Report size and location of the stack to the caller. */ |
| *sizep = result->stackblock_size; |
| *memp = result->stackblock; |
| |
| /* Cancellation handling is back to the default. */ |
| result->cancelhandling = 0; |
| result->cleanup = NULL; |
| |
| /* No pending event. */ |
| result->nextevent = NULL; |
| |
| /* Clear the DTV. */ |
| dtv_t *dtv = GET_DTV (TLS_TPADJ (result)); |
| _dl_clear_dtv (dtv); |
| |
| /* Re-initialize the TLS. */ |
| _dl_allocate_tls_init (TLS_TPADJ (result)); |
| |
| return result; |
| } |
| |
| |
| /* Free stacks until cache size is lower than LIMIT. */ |
| void |
| __free_stacks (size_t limit) |
| { |
| /* We reduce the size of the cache. Remove the last entries until |
| the size is below the limit. */ |
| list_t *entry; |
| list_t *prev; |
| |
| /* Search from the end of the list. */ |
| list_for_each_prev_safe (entry, prev, &stack_cache) |
| { |
| struct pthread *curr; |
| |
| curr = list_entry (entry, struct pthread, list); |
| if (FREE_P (curr)) |
| { |
| /* Unlink the block. */ |
| stack_list_del (entry); |
| |
| /* Account for the freed memory. */ |
| stack_cache_actsize -= curr->stackblock_size; |
| |
| /* Free the memory associated with the ELF TLS. */ |
| _dl_deallocate_tls (TLS_TPADJ (curr), false); |
| |
| /* Remove this block. This should never fail. If it does |
| something is really wrong. */ |
| if (munmap (curr->stackblock, curr->stackblock_size) != 0) |
| abort (); |
| |
| /* Maybe we have freed enough. */ |
| if (stack_cache_actsize <= limit) |
| break; |
| } |
| } |
| } |
| |
| |
| /* Add a stack frame which is not used anymore to the stack. Must be |
| called with the cache lock held. */ |
| static inline void |
| __attribute ((always_inline)) |
| queue_stack (struct pthread *stack) |
| { |
| /* We unconditionally add the stack to the list. The memory may |
| still be in use but it will not be reused until the kernel marks |
| the stack as not used anymore. */ |
| stack_list_add (&stack->list, &stack_cache); |
| |
| stack_cache_actsize += stack->stackblock_size; |
| if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0)) |
| __free_stacks (stack_cache_maxsize); |
| } |
| |
| |
| static int |
| internal_function |
| change_stack_perm (struct pthread *pd |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| , size_t pagemask |
| #endif |
| ) |
| { |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| void *stack = (pd->stackblock |
| + (((((pd->stackblock_size - pd->guardsize) / 2) |
| & pagemask) + pd->guardsize) & pagemask)); |
| size_t len = pd->stackblock + pd->stackblock_size - stack; |
| #elif _STACK_GROWS_DOWN |
| void *stack = pd->stackblock + pd->guardsize; |
| size_t len = pd->stackblock_size - pd->guardsize; |
| #elif _STACK_GROWS_UP |
| void *stack = pd->stackblock; |
| size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock; |
| #else |
| # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" |
| #endif |
| if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) |
| return errno; |
| |
| return 0; |
| } |
| |
| |
| /* Returns a usable stack for a new thread either by allocating a |
| new stack or reusing a cached stack of sufficient size. |
| ATTR must be non-NULL and point to a valid pthread_attr. |
| PDP must be non-NULL. */ |
| static int |
| allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, |
| ALLOCATE_STACK_PARMS) |
| { |
| struct pthread *pd; |
| size_t size; |
| size_t pagesize_m1 = __getpagesize () - 1; |
| void *stacktop; |
| |
| assert (powerof2 (pagesize_m1 + 1)); |
| assert (TCB_ALIGNMENT >= STACK_ALIGN); |
| |
| /* Get the stack size from the attribute if it is set. Otherwise we |
| use the default we determined at start time. */ |
| if (attr->stacksize != 0) |
| size = attr->stacksize; |
| else |
| { |
| lll_lock (__default_pthread_attr_lock, LLL_PRIVATE); |
| size = __default_pthread_attr.stacksize; |
| lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE); |
| } |
| |
| /* Get memory for the stack. */ |
| if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0)) |
| { |
| uintptr_t adj; |
| |
| /* If the user also specified the size of the stack make sure it |
| is large enough. */ |
| if (attr->stacksize != 0 |
| && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK)) |
| return EINVAL; |
| |
| /* Adjust stack size for alignment of the TLS block. */ |
| #if TLS_TCB_AT_TP |
| adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE) |
| & __static_tls_align_m1; |
| assert (size > adj + TLS_TCB_SIZE); |
| #elif TLS_DTV_AT_TP |
| adj = ((uintptr_t) attr->stackaddr - __static_tls_size) |
| & __static_tls_align_m1; |
| assert (size > adj); |
| #endif |
| |
| /* The user provided some memory. Let's hope it matches the |
| size... We do not allocate guard pages if the user provided |
| the stack. It is the user's responsibility to do this if it |
| is wanted. */ |
| #if TLS_TCB_AT_TP |
| pd = (struct pthread *) ((uintptr_t) attr->stackaddr |
| - TLS_TCB_SIZE - adj); |
| #elif TLS_DTV_AT_TP |
| pd = (struct pthread *) (((uintptr_t) attr->stackaddr |
| - __static_tls_size - adj) |
| - TLS_PRE_TCB_SIZE); |
| #endif |
| |
| /* The user provided stack memory needs to be cleared. */ |
| memset (pd, '\0', sizeof (struct pthread)); |
| |
| /* The first TSD block is included in the TCB. */ |
| pd->specific[0] = pd->specific_1stblock; |
| |
| /* Remember the stack-related values. */ |
| pd->stackblock = (char *) attr->stackaddr - size; |
| pd->stackblock_size = size; |
| |
| /* This is a user-provided stack. It will not be queued in the |
| stack cache nor will the memory (except the TLS memory) be freed. */ |
| pd->user_stack = true; |
| |
| /* This is at least the second thread. */ |
| pd->header.multiple_threads = 1; |
| #ifndef TLS_MULTIPLE_THREADS_IN_TCB |
| __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1; |
| #endif |
| |
| #ifndef __ASSUME_PRIVATE_FUTEX |
| /* The thread must know when private futexes are supported. */ |
| pd->header.private_futex = THREAD_GETMEM (THREAD_SELF, |
| header.private_futex); |
| #endif |
| |
| #ifdef NEED_DL_SYSINFO |
| /* Copy the sysinfo value from the parent. */ |
| THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO; |
| #endif |
| |
| /* The process ID is also the same as that of the caller. */ |
| pd->pid = THREAD_GETMEM (THREAD_SELF, pid); |
| |
| /* Don't allow setxid until cloned. */ |
| pd->setxid_futex = -1; |
| |
| /* Allocate the DTV for this thread. */ |
| if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL) |
| { |
| /* Something went wrong. */ |
| assert (errno == ENOMEM); |
| return errno; |
| } |
| |
| |
| /* Prepare to modify global data. */ |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* And add to the list of stacks in use. */ |
| list_add (&pd->list, &__stack_user); |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| } |
| else |
| { |
| /* Allocate some anonymous memory. If possible use the cache. */ |
| size_t guardsize; |
| size_t reqsize; |
| void *mem; |
| const int prot = (PROT_READ | PROT_WRITE |
| | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); |
| |
| #if COLORING_INCREMENT != 0 |
| /* Add one more page for stack coloring. Don't do it for stacks |
| with 16 times pagesize or larger. This might just cause |
| unnecessary misalignment. */ |
| if (size <= 16 * pagesize_m1) |
| size += pagesize_m1 + 1; |
| #endif |
| |
| /* Adjust the stack size for alignment. */ |
| size &= ~__static_tls_align_m1; |
| assert (size != 0); |
| |
| /* Make sure the size of the stack is enough for the guard and |
| eventually the thread descriptor. */ |
| guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1; |
| if (__builtin_expect (size < ((guardsize + __static_tls_size |
| + MINIMAL_REST_STACK + pagesize_m1) |
| & ~pagesize_m1), |
| 0)) |
| /* The stack is too small (or the guard too large). */ |
| return EINVAL; |
| |
| /* Try to get a stack from the cache. */ |
| reqsize = size; |
| pd = get_cached_stack (&size, &mem); |
| if (pd == NULL) |
| { |
| /* To avoid aliasing effects on a larger scale than pages we |
| adjust the allocated stack size if necessary. This way |
| allocations directly following each other will not have |
| aliasing problems. */ |
| #if MULTI_PAGE_ALIASING != 0 |
| if ((size % MULTI_PAGE_ALIASING) == 0) |
| size += pagesize_m1 + 1; |
| #endif |
| |
| mem = mmap (NULL, size, prot, |
| MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); |
| |
| if (__builtin_expect (mem == MAP_FAILED, 0)) |
| return errno; |
| |
| /* SIZE is guaranteed to be greater than zero. |
| So we can never get a null pointer back from mmap. */ |
| assert (mem != NULL); |
| |
| #if COLORING_INCREMENT != 0 |
| /* Atomically increment NCREATED. */ |
| unsigned int ncreated = atomic_increment_val (&nptl_ncreated); |
| |
| /* We chose the offset for coloring by incrementing it for |
| every new thread by a fixed amount. The offset used |
| module the page size. Even if coloring would be better |
| relative to higher alignment values it makes no sense to |
| do it since the mmap() interface does not allow us to |
| specify any alignment for the returned memory block. */ |
| size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1; |
| |
| /* Make sure the coloring offsets does not disturb the alignment |
| of the TCB and static TLS block. */ |
| if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0)) |
| coloring = (((coloring + __static_tls_align_m1) |
| & ~(__static_tls_align_m1)) |
| & ~pagesize_m1); |
| #else |
| /* Unless specified we do not make any adjustments. */ |
| # define coloring 0 |
| #endif |
| |
| /* Place the thread descriptor at the end of the stack. */ |
| #if TLS_TCB_AT_TP |
| pd = (struct pthread *) ((char *) mem + size - coloring) - 1; |
| #elif TLS_DTV_AT_TP |
| pd = (struct pthread *) ((((uintptr_t) mem + size - coloring |
| - __static_tls_size) |
| & ~__static_tls_align_m1) |
| - TLS_PRE_TCB_SIZE); |
| #endif |
| |
| /* Remember the stack-related values. */ |
| pd->stackblock = mem; |
| pd->stackblock_size = size; |
| |
| /* We allocated the first block thread-specific data array. |
| This address will not change for the lifetime of this |
| descriptor. */ |
| pd->specific[0] = pd->specific_1stblock; |
| |
| /* This is at least the second thread. */ |
| pd->header.multiple_threads = 1; |
| #ifndef TLS_MULTIPLE_THREADS_IN_TCB |
| __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1; |
| #endif |
| |
| #ifndef __ASSUME_PRIVATE_FUTEX |
| /* The thread must know when private futexes are supported. */ |
| pd->header.private_futex = THREAD_GETMEM (THREAD_SELF, |
| header.private_futex); |
| #endif |
| |
| #ifdef NEED_DL_SYSINFO |
| /* Copy the sysinfo value from the parent. */ |
| THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO; |
| #endif |
| |
| /* Don't allow setxid until cloned. */ |
| pd->setxid_futex = -1; |
| |
| /* The process ID is also the same as that of the caller. */ |
| pd->pid = THREAD_GETMEM (THREAD_SELF, pid); |
| |
| /* Allocate the DTV for this thread. */ |
| if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL) |
| { |
| /* Something went wrong. */ |
| assert (errno == ENOMEM); |
| |
| /* Free the stack memory we just allocated. */ |
| (void) munmap (mem, size); |
| |
| return errno; |
| } |
| |
| |
| /* Prepare to modify global data. */ |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* And add to the list of stacks in use. */ |
| stack_list_add (&pd->list, &stack_used); |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| |
| /* There might have been a race. Another thread might have |
| caused the stacks to get exec permission while this new |
| stack was prepared. Detect if this was possible and |
| change the permission if necessary. */ |
| if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0 |
| && (prot & PROT_EXEC) == 0, 0)) |
| { |
| int err = change_stack_perm (pd |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| , ~pagesize_m1 |
| #endif |
| ); |
| if (err != 0) |
| { |
| /* Free the stack memory we just allocated. */ |
| (void) munmap (mem, size); |
| |
| return err; |
| } |
| } |
| |
| |
| /* Note that all of the stack and the thread descriptor is |
| zeroed. This means we do not have to initialize fields |
| with initial value zero. This is specifically true for |
| the 'tid' field which is always set back to zero once the |
| stack is not used anymore and for the 'guardsize' field |
| which will be read next. */ |
| } |
| |
| /* Create or resize the guard area if necessary. */ |
| if (__builtin_expect (guardsize > pd->guardsize, 0)) |
| { |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1); |
| #elif _STACK_GROWS_DOWN |
| char *guard = mem; |
| # elif _STACK_GROWS_UP |
| char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1); |
| #endif |
| if (mprotect (guard, guardsize, PROT_NONE) != 0) |
| { |
| mprot_error: |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Remove the thread from the list. */ |
| stack_list_del (&pd->list); |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Get rid of the TLS block we allocated. */ |
| _dl_deallocate_tls (TLS_TPADJ (pd), false); |
| |
| /* Free the stack memory regardless of whether the size |
| of the cache is over the limit or not. If this piece |
| of memory caused problems we better do not use it |
| anymore. Uh, and we ignore possible errors. There |
| is nothing we could do. */ |
| (void) munmap (mem, size); |
| |
| return errno; |
| } |
| /* The call to madvise(...MADV_DONTNEED) below will fail if pages |
| are locked. Unlock the guard memory to make sure the |
| madvise function succeeds (memory can be locked if |
| process called mlockall(MCL_FUTURE) at some point in the |
| past).*/ |
| munlock (guard, guardsize); |
| /* We've marked this guard region unwritable, but it's |
| possible it already became resident, the most common case |
| being transparent hugepages; if stack + guard (+ adjacent |
| mmap regions) were more than 2MB, the kernel might have |
| filled in a full hugepage when we touched the thread |
| descriptor above. This can waste a lot of memory; mark |
| the guard unused. This shouldn't fail, but if it does, |
| we can't really do anything about it so ignore it. */ |
| madvise (guard, guardsize, MADV_DONTNEED); |
| |
| pd->guardsize = guardsize; |
| } |
| else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize, |
| 0)) |
| { |
| /* The old guard area is too large. */ |
| |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1); |
| char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1); |
| |
| if (oldguard < guard |
| && mprotect (oldguard, guard - oldguard, prot) != 0) |
| goto mprot_error; |
| |
| if (mprotect (guard + guardsize, |
| oldguard + pd->guardsize - guard - guardsize, |
| prot) != 0) |
| goto mprot_error; |
| #elif _STACK_GROWS_DOWN |
| if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize, |
| prot) != 0) |
| goto mprot_error; |
| #elif _STACK_GROWS_UP |
| if (mprotect ((char *) pd - pd->guardsize, |
| pd->guardsize - guardsize, prot) != 0) |
| goto mprot_error; |
| #endif |
| |
| pd->guardsize = guardsize; |
| } |
| /* The pthread_getattr_np() calls need to get passed the size |
| requested in the attribute, regardless of how large the |
| actually used guardsize is. */ |
| pd->reported_guardsize = guardsize; |
| } |
| |
| /* Initialize the lock. We have to do this unconditionally since the |
| stillborn thread could be canceled while the lock is taken. */ |
| pd->lock = LLL_LOCK_INITIALIZER; |
| |
| /* The robust mutex lists also need to be initialized |
| unconditionally because the cleanup for the previous stack owner |
| might have happened in the kernel. */ |
| pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock) |
| - offsetof (pthread_mutex_t, |
| __data.__list.__next)); |
| pd->robust_head.list_op_pending = NULL; |
| #ifdef __PTHREAD_MUTEX_HAVE_PREV |
| pd->robust_prev = &pd->robust_head; |
| #endif |
| pd->robust_head.list = &pd->robust_head; |
| |
| /* We place the thread descriptor at the end of the stack. */ |
| *pdp = pd; |
| |
| #if TLS_TCB_AT_TP |
| /* The stack begins before the TCB and the static TLS block. */ |
| stacktop = ((char *) (pd + 1) - __static_tls_size); |
| #elif TLS_DTV_AT_TP |
| stacktop = (char *) (pd - 1); |
| #endif |
| |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| *stack = pd->stackblock; |
| *stacksize = stacktop - *stack; |
| #elif _STACK_GROWS_DOWN |
| *stack = stacktop; |
| #elif _STACK_GROWS_UP |
| *stack = pd->stackblock; |
| assert (*stack > 0); |
| #endif |
| |
| return 0; |
| } |
| |
| |
| void |
| internal_function |
| __deallocate_stack (struct pthread *pd) |
| { |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Remove the thread from the list of threads with user defined |
| stacks. */ |
| stack_list_del (&pd->list); |
| |
| /* Not much to do. Just free the mmap()ed memory. Note that we do |
| not reset the 'used' flag in the 'tid' field. This is done by |
| the kernel. If no thread has been created yet this field is |
| still zero. */ |
| if (__builtin_expect (! pd->user_stack, 1)) |
| (void) queue_stack (pd); |
| else |
| /* Free the memory associated with the ELF TLS. */ |
| _dl_deallocate_tls (TLS_TPADJ (pd), false); |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| } |
| |
| |
| int |
| internal_function |
| __make_stacks_executable (void **stack_endp) |
| { |
| /* First the main thread's stack. */ |
| int err = _dl_make_stack_executable (stack_endp); |
| if (err != 0) |
| return err; |
| |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| const size_t pagemask = ~(__getpagesize () - 1); |
| #endif |
| |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| { |
| err = change_stack_perm (list_entry (runp, struct pthread, list) |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| , pagemask |
| #endif |
| ); |
| if (err != 0) |
| break; |
| } |
| |
| /* Also change the permission for the currently unused stacks. This |
| might be wasted time but better spend it here than adding a check |
| in the fast path. */ |
| if (err == 0) |
| list_for_each (runp, &stack_cache) |
| { |
| err = change_stack_perm (list_entry (runp, struct pthread, list) |
| #ifdef NEED_SEPARATE_REGISTER_STACK |
| , pagemask |
| #endif |
| ); |
| if (err != 0) |
| break; |
| } |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| return err; |
| } |
| |
| |
| /* In case of a fork() call the memory allocation in the child will be |
| the same but only one thread is running. All stacks except that of |
| the one running thread are not used anymore. We have to recycle |
| them. */ |
| void |
| __reclaim_stacks (void) |
| { |
| struct pthread *self = (struct pthread *) THREAD_SELF; |
| |
| /* No locking necessary. The caller is the only stack in use. But |
| we have to be aware that we might have interrupted a list |
| operation. */ |
| |
| if (in_flight_stack != 0) |
| { |
| bool add_p = in_flight_stack & 1; |
| list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1); |
| |
| if (add_p) |
| { |
| /* We always add at the beginning of the list. So in this |
| case we only need to check the beginning of these lists. */ |
| int check_list (list_t *l) |
| { |
| if (l->next->prev != l) |
| { |
| assert (l->next->prev == elem); |
| |
| elem->next = l->next; |
| elem->prev = l; |
| l->next = elem; |
| |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| if (check_list (&stack_used) == 0) |
| (void) check_list (&stack_cache); |
| } |
| else |
| { |
| /* We can simply always replay the delete operation. */ |
| elem->next->prev = elem->prev; |
| elem->prev->next = elem->next; |
| } |
| } |
| |
| /* Mark all stacks except the still running one as free. */ |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *curp = list_entry (runp, struct pthread, list); |
| if (curp != self) |
| { |
| /* This marks the stack as free. */ |
| curp->tid = 0; |
| |
| /* The PID field must be initialized for the new process. */ |
| curp->pid = self->pid; |
| |
| /* Account for the size of the stack. */ |
| stack_cache_actsize += curp->stackblock_size; |
| |
| if (curp->specific_used) |
| { |
| /* Clear the thread-specific data. */ |
| memset (curp->specific_1stblock, '\0', |
| sizeof (curp->specific_1stblock)); |
| |
| curp->specific_used = false; |
| |
| for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt) |
| if (curp->specific[cnt] != NULL) |
| { |
| memset (curp->specific[cnt], '\0', |
| sizeof (curp->specific_1stblock)); |
| |
| /* We have allocated the block which we do not |
| free here so re-set the bit. */ |
| curp->specific_used = true; |
| } |
| } |
| } |
| } |
| |
| /* Reset the PIDs in any cached stacks. */ |
| list_for_each (runp, &stack_cache) |
| { |
| struct pthread *curp = list_entry (runp, struct pthread, list); |
| curp->pid = self->pid; |
| } |
| |
| /* Add the stack of all running threads to the cache. */ |
| list_splice (&stack_used, &stack_cache); |
| |
| /* Remove the entry for the current thread to from the cache list |
| and add it to the list of running threads. Which of the two |
| lists is decided by the user_stack flag. */ |
| stack_list_del (&self->list); |
| |
| /* Re-initialize the lists for all the threads. */ |
| INIT_LIST_HEAD (&stack_used); |
| INIT_LIST_HEAD (&__stack_user); |
| |
| if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0)) |
| list_add (&self->list, &__stack_user); |
| else |
| list_add (&self->list, &stack_used); |
| |
| /* There is one thread running. */ |
| __nptl_nthreads = 1; |
| |
| in_flight_stack = 0; |
| |
| /* Initialize locks. */ |
| stack_cache_lock = LLL_LOCK_INITIALIZER; |
| __default_pthread_attr_lock = LLL_LOCK_INITIALIZER; |
| } |
| |
| |
| #if HP_TIMING_AVAIL |
| # undef __find_thread_by_id |
| /* Find a thread given the thread ID. */ |
| attribute_hidden |
| struct pthread * |
| __find_thread_by_id (pid_t tid) |
| { |
| struct pthread *result = NULL; |
| |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Iterate over the list with system-allocated threads first. */ |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *curp; |
| |
| curp = list_entry (runp, struct pthread, list); |
| |
| if (curp->tid == tid) |
| { |
| result = curp; |
| goto out; |
| } |
| } |
| |
| /* Now the list with threads using user-allocated stacks. */ |
| list_for_each (runp, &__stack_user) |
| { |
| struct pthread *curp; |
| |
| curp = list_entry (runp, struct pthread, list); |
| |
| if (curp->tid == tid) |
| { |
| result = curp; |
| goto out; |
| } |
| } |
| |
| out: |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| |
| return result; |
| } |
| #endif |
| |
| |
| static void |
| internal_function |
| setxid_mark_thread (struct xid_command *cmdp, struct pthread *t) |
| { |
| int ch; |
| |
| /* Wait until this thread is cloned. */ |
| if (t->setxid_futex == -1 |
| && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1)) |
| do |
| lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE); |
| while (t->setxid_futex == -2); |
| |
| /* Don't let the thread exit before the setxid handler runs. */ |
| t->setxid_futex = 0; |
| |
| do |
| { |
| ch = t->cancelhandling; |
| |
| /* If the thread is exiting right now, ignore it. */ |
| if ((ch & EXITING_BITMASK) != 0) |
| { |
| /* Release the futex if there is no other setxid in |
| progress. */ |
| if ((ch & SETXID_BITMASK) == 0) |
| { |
| t->setxid_futex = 1; |
| lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE); |
| } |
| return; |
| } |
| } |
| while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling, |
| ch | SETXID_BITMASK, ch)); |
| } |
| |
| |
| static void |
| internal_function |
| setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t) |
| { |
| int ch; |
| |
| do |
| { |
| ch = t->cancelhandling; |
| if ((ch & SETXID_BITMASK) == 0) |
| return; |
| } |
| while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling, |
| ch & ~SETXID_BITMASK, ch)); |
| |
| /* Release the futex just in case. */ |
| t->setxid_futex = 1; |
| lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE); |
| } |
| |
| |
| static int |
| internal_function |
| setxid_signal_thread (struct xid_command *cmdp, struct pthread *t) |
| { |
| if ((t->cancelhandling & SETXID_BITMASK) == 0) |
| return 0; |
| |
| int val; |
| INTERNAL_SYSCALL_DECL (err); |
| val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid), |
| t->tid, SIGSETXID); |
| |
| /* If this failed, it must have had not started yet or else exited. */ |
| if (!INTERNAL_SYSCALL_ERROR_P (val, err)) |
| { |
| atomic_increment (&cmdp->cntr); |
| return 1; |
| } |
| else |
| return 0; |
| } |
| |
| |
| int |
| attribute_hidden |
| __nptl_setxid (struct xid_command *cmdp) |
| { |
| int signalled; |
| int result; |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| __xidcmd = cmdp; |
| cmdp->cntr = 0; |
| |
| struct pthread *self = THREAD_SELF; |
| |
| /* Iterate over the list with system-allocated threads first. */ |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| setxid_mark_thread (cmdp, t); |
| } |
| |
| /* Now the list with threads using user-allocated stacks. */ |
| list_for_each (runp, &__stack_user) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| setxid_mark_thread (cmdp, t); |
| } |
| |
| /* Iterate until we don't succeed in signalling anyone. That means |
| we have gotten all running threads, and their children will be |
| automatically correct once started. */ |
| do |
| { |
| signalled = 0; |
| |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| signalled += setxid_signal_thread (cmdp, t); |
| } |
| |
| list_for_each (runp, &__stack_user) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| signalled += setxid_signal_thread (cmdp, t); |
| } |
| |
| int cur = cmdp->cntr; |
| while (cur != 0) |
| { |
| lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE); |
| cur = cmdp->cntr; |
| } |
| } |
| while (signalled != 0); |
| |
| /* Clean up flags, so that no thread blocks during exit waiting |
| for a signal which will never come. */ |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| setxid_unmark_thread (cmdp, t); |
| } |
| |
| list_for_each (runp, &__stack_user) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self) |
| continue; |
| |
| setxid_unmark_thread (cmdp, t); |
| } |
| |
| /* This must be last, otherwise the current thread might not have |
| permissions to send SIGSETXID syscall to the other threads. */ |
| INTERNAL_SYSCALL_DECL (err); |
| result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3, |
| cmdp->id[0], cmdp->id[1], cmdp->id[2]); |
| if (INTERNAL_SYSCALL_ERROR_P (result, err)) |
| { |
| __set_errno (INTERNAL_SYSCALL_ERRNO (result, err)); |
| result = -1; |
| } |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| return result; |
| } |
| |
| static inline void __attribute__((always_inline)) |
| init_one_static_tls (struct pthread *curp, struct link_map *map) |
| { |
| dtv_t *dtv = GET_DTV (TLS_TPADJ (curp)); |
| # if TLS_TCB_AT_TP |
| void *dest = (char *) curp - map->l_tls_offset; |
| # elif TLS_DTV_AT_TP |
| void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE; |
| # else |
| # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" |
| # endif |
| |
| /* Initialize the memory. */ |
| memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size), |
| '\0', map->l_tls_blocksize - map->l_tls_initimage_size); |
| |
| /* Fill in the DTV slot so that a later LD/GD access will find it. */ |
| dtv[map->l_tls_modid].pointer.is_static = true; |
| /* Pairs against the read barrier in tls_get_attr_tail, guaranteeing |
| any thread waiting for an update to pointer.val sees the |
| initimage write. */ |
| atomic_write_barrier (); |
| dtv[map->l_tls_modid].pointer.val = dest; |
| |
| } |
| |
| void |
| attribute_hidden |
| __pthread_init_static_tls (struct link_map *map) |
| { |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| /* Iterate over the list with system-allocated threads first. */ |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| init_one_static_tls (list_entry (runp, struct pthread, list), map); |
| |
| /* Now the list with threads using user-allocated stacks. */ |
| list_for_each (runp, &__stack_user) |
| init_one_static_tls (list_entry (runp, struct pthread, list), map); |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| } |
| |
| |
| void |
| attribute_hidden |
| __wait_lookup_done (void) |
| { |
| lll_lock (stack_cache_lock, LLL_PRIVATE); |
| |
| struct pthread *self = THREAD_SELF; |
| |
| /* Iterate over the list with system-allocated threads first. */ |
| list_t *runp; |
| list_for_each (runp, &stack_used) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) |
| continue; |
| |
| int *const gscope_flagp = &t->header.gscope_flag; |
| |
| /* We have to wait until this thread is done with the global |
| scope. First tell the thread that we are waiting and |
| possibly have to be woken. */ |
| if (atomic_compare_and_exchange_bool_acq (gscope_flagp, |
| THREAD_GSCOPE_FLAG_WAIT, |
| THREAD_GSCOPE_FLAG_USED)) |
| continue; |
| |
| do |
| lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE); |
| while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); |
| } |
| |
| /* Now the list with threads using user-allocated stacks. */ |
| list_for_each (runp, &__stack_user) |
| { |
| struct pthread *t = list_entry (runp, struct pthread, list); |
| if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) |
| continue; |
| |
| int *const gscope_flagp = &t->header.gscope_flag; |
| |
| /* We have to wait until this thread is done with the global |
| scope. First tell the thread that we are waiting and |
| possibly have to be woken. */ |
| if (atomic_compare_and_exchange_bool_acq (gscope_flagp, |
| THREAD_GSCOPE_FLAG_WAIT, |
| THREAD_GSCOPE_FLAG_USED)) |
| continue; |
| |
| do |
| lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE); |
| while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); |
| } |
| |
| lll_unlock (stack_cache_lock, LLL_PRIVATE); |
| } |