google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_wait.c - GRTEv5 - Git at Google

 /* Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.

    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */

 #include <endian.h>
 #include <errno.h>
 #include <sysdep.h>
 #include <futex-internal.h>
 #include <pthread.h>
 #include <pthreadP.h>
 #include <sys/time.h>
 #include <atomic.h>
 #include <stdint.h>
 #include <stdbool.h>

 #include <shlib-compat.h>
 #include <stap-probe.h>
 #include <time.h>

 #include "pthread_cond_common.c"


 struct _condvar_cleanup_buffer
 {
   uint64_t wseq;
   pthread_cond_t *cond;
   pthread_mutex_t *mutex;
   int private;
 };


 /* Decrease the waiter reference count.  */
 static void
 __condvar_confirm_wakeup (pthread_cond_t *cond, int private)
 {
   /* If destruction is pending (i.e., the wake-request flag is nonzero) and we
      are the last waiter (prior value of __wrefs was 1 << 3), then wake any
      threads waiting in pthread_cond_destroy.  Release MO to synchronize with
      these threads.  Don't bother clearing the wake-up request flag.  */
   if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
     futex_wake (&cond->__data.__wrefs, INT_MAX, private);
 }


 /* Cancel waiting after having registered as a waiter previously.  SEQ is our
    position and G is our group index.
    The goal of cancellation is to make our group smaller if that is still
    possible.  If we are in a closed group, this is not possible anymore; in
    this case, we need to send a replacement signal for the one we effectively
    consumed because the signal should have gotten consumed by another waiter
    instead; we must not both cancel waiting and consume a signal.

    Must not be called while still holding a reference on the group.

    Returns true iff we consumed a signal.

    On some kind of timeouts, we may be able to pretend that a signal we
    effectively consumed happened before the timeout (i.e., similarly to first
    spinning on signals before actually checking whether the timeout has
    passed already).  Doing this would allow us to skip sending a replacement
    signal, but this case might happen rarely because the end of the timeout
    must race with someone else sending a signal.  Therefore, we don't bother
    trying to optimize this.  */
 static void
 __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
 			  int private)
 {
   bool consumed_signal = false;

   /* No deadlock with group switching is possible here because we have do
      not hold a reference on the group.  */
   __condvar_acquire_lock (cond, private);

   uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
   if (g1_start > seq)
     {
       /* Our group is closed, so someone provided enough signals for it.
 	 Thus, we effectively consumed a signal.  */
       consumed_signal = true;
     }
   else
     {
       if (g1_start + __condvar_get_orig_size (cond) <= seq)
 	{
 	  /* We are in the current G2 and thus cannot have consumed a signal.
 	     Reduce its effective size or handle overflow.  Remember that in
 	     G2, unsigned int size is zero or a negative value.  */
 	  if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
 	    {
 	      cond->__data.__g_size[g]--;
 	    }
 	  else
 	    {
 	      /* Cancellations would overflow the maximum group size.  Just
 		 wake up everyone spuriously to create a clean state.  This
 		 also means we do not consume a signal someone else sent.  */
 	      __condvar_release_lock (cond, private);
 	      __pthread_cond_broadcast (cond);
 	      return;
 	    }
 	}
       else
 	{
 	  /* We are in current G1.  If the group's size is zero, someone put
 	     a signal in the group that nobody else but us can consume.  */
 	  if (cond->__data.__g_size[g] == 0)
 	    consumed_signal = true;
 	  else
 	    {
 	      /* Otherwise, we decrease the size of the group.  This is
 		 equivalent to atomically putting in a signal just for us and
 		 consuming it right away.  We do not consume a signal sent
 		 by someone else.  We also cannot have consumed a futex
 		 wake-up because if we were cancelled or timed out in a futex
 		 call, the futex will wake another waiter.  */
 	      cond->__data.__g_size[g]--;
 	    }
 	}
     }

   __condvar_release_lock (cond, private);

   if (consumed_signal)
     {
       /* We effectively consumed a signal even though we didn't want to.
 	 Therefore, we need to send a replacement signal.
 	 If we would want to optimize this, we could do what
 	 pthread_cond_signal does right in the critical section above.  */
       __pthread_cond_signal (cond);
     }
 }

 /* Wake up any signalers that might be waiting.  */
 static void
 __condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
 {
   /* Release MO to synchronize-with the acquire load in
      __condvar_quiesce_and_switch_g1.  */
   if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
     {
       /* Clear the wake-up request flag before waking up.  We do not need more
 	 than relaxed MO and it doesn't matter if we apply this for an aliased
 	 group because we wake all futex waiters right after clearing the
 	 flag.  */
       atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
       futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
     }
 }

 /* Clean-up for cancellation of waiters waiting for normal signals.  We cancel
    our registration as a waiter, confirm we have woken up, and re-acquire the
    mutex.  */
 static void
 __condvar_cleanup_waiting (void *arg)
 {
   struct _condvar_cleanup_buffer *cbuffer =
     (struct _condvar_cleanup_buffer *) arg;
   pthread_cond_t *cond = cbuffer->cond;
   unsigned g = cbuffer->wseq & 1;

   __condvar_dec_grefs (cond, g, cbuffer->private);

   __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
   /* FIXME With the current cancellation implementation, it is possible that
      a thread is cancelled after it has returned from a syscall.  This could
      result in a cancelled waiter consuming a futex wake-up that is then
      causing another waiter in the same group to not wake up.  To work around
      this issue until we have fixed cancellation, just add a futex wake-up
      conservatively.  */
   futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);

   __condvar_confirm_wakeup (cond, cbuffer->private);

   /* XXX If locking the mutex fails, should we just stop execution?  This
      might be better than silently ignoring the error.  */
   __pthread_mutex_cond_lock (cbuffer->mutex);
 }

 /* This condvar implementation guarantees that all calls to signal and
    broadcast and all of the three virtually atomic parts of each call to wait
    (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
    acquiring the mutex) happen in some total order that is consistent with the
    happens-before relations in the calling program.  However, this order does
    not necessarily result in additional happens-before relations being
    established (which aligns well with spurious wake-ups being allowed).

    All waiters acquire a certain position in a 64b waiter sequence (__wseq).
    This sequence determines which waiters are allowed to consume signals.
    A broadcast is equal to sending as many signals as are unblocked waiters.
    When a signal arrives, it samples the current value of __wseq with a
    relaxed-MO load (i.e., the position the next waiter would get).  (This is
    sufficient because it is consistent with happens-before; the caller can
    enforce stronger ordering constraints by calling signal while holding the
    mutex.)  Only waiters with a position less than the __wseq value observed
    by the signal are eligible to consume this signal.

    This would be straight-forward to implement if waiters would just spin but
    we need to let them block using futexes.  Futexes give no guarantee of
    waking in FIFO order, so we cannot reliably wake eligible waiters if we
    just use a single futex.  Also, futex words are 32b in size, but we need
    to distinguish more than 1<<32 states because we need to represent the
    order of wake-up (and thus which waiters are eligible to consume signals);
    blocking in a futex is not atomic with a waiter determining its position in
    the waiter sequence, so we need the futex word to reliably notify waiters
    that they should not attempt to block anymore because they have been
    already signaled in the meantime.  While an ABA issue on a 32b value will
    be rare, ignoring it when we are aware of it is not the right thing to do
    either.

    Therefore, we use a 64b counter to represent the waiter sequence (on
    architectures which only support 32b atomics, we use a few bits less).
    To deal with the blocking using futexes, we maintain two groups of waiters:
    * Group G1 consists of waiters that are all eligible to consume signals;
      incoming signals will always signal waiters in this group until all
      waiters in G1 have been signaled.
    * Group G2 consists of waiters that arrive when a G1 is present and still
      contains waiters that have not been signaled.  When all waiters in G1
      are signaled and a new signal arrives, the new signal will convert G2
      into the new G1 and create a new G2 for future waiters.

    We cannot allocate new memory because of process-shared condvars, so we
    have just two slots of groups that change their role between G1 and G2.
    Each has a separate futex word, a number of signals available for
    consumption, a size (number of waiters in the group that have not been
    signaled), and a reference count.

    The group reference count is used to maintain the number of waiters that
    are using the group's futex.  Before a group can change its role, the
    reference count must show that no waiters are using the futex anymore; this
    prevents ABA issues on the futex word.

    To represent which intervals in the waiter sequence the groups cover (and
    thus also which group slot contains G1 or G2), we use a 64b counter to
    designate the start position of G1 (inclusive), and a single bit in the
    waiter sequence counter to represent which group slot currently contains
    G2.  This allows us to switch group roles atomically wrt. waiters obtaining
    a position in the waiter sequence.  The G1 start position allows waiters to
    figure out whether they are in a group that has already been completely
    signaled (i.e., if the current G1 starts at a later position that the
    waiter's position).  Waiters cannot determine whether they are currently
    in G2 or G1 -- but they do not have too because all they are interested in
    is whether there are available signals, and they always start in G2 (whose
    group slot they know because of the bit in the waiter sequence.  Signalers
    will simply fill the right group until it is completely signaled and can
    be closed (they do not switch group roles until they really have to to
    decrease the likelihood of having to wait for waiters still holding a
    reference on the now-closed G1).

    Signalers maintain the initial size of G1 to be able to determine where
    G2 starts (G2 is always open-ended until it becomes G1).  They track the
    remaining size of a group; when waiters cancel waiting (due to PThreads
    cancellation or timeouts), they will decrease this remaining size as well.

    To implement condvar destruction requirements (i.e., that
    pthread_cond_destroy can be called as soon as all waiters have been
    signaled), waiters increment a reference count before starting to wait and
    decrement it after they stopped waiting but right before they acquire the
    mutex associated with the condvar.

    pthread_cond_t thus consists of the following (bits that are used for
    flags and are not part of the primary value of each field but necessary
    to make some things atomic or because there was no space for them
    elsewhere in the data structure):

    __wseq: Waiter sequence counter
      * LSB is index of current G2.
      * Waiters fetch-add while having acquire the mutex associated with the
        condvar.  Signalers load it and fetch-xor it concurrently.
    __g1_start: Starting position of G1 (inclusive)
      * LSB is index of current G2.
      * Modified by signalers while having acquired the condvar-internal lock
        and observed concurrently by waiters.
    __g1_orig_size: Initial size of G1
      * The two least-significant bits represent the condvar-internal lock.
      * Only accessed while having acquired the condvar-internal lock.
    __wrefs: Waiter reference counter.
      * Bit 2 is true if waiters should run futex_wake when they remove the
        last reference.  pthread_cond_destroy uses this as futex word.
      * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
      * Bit 0 is true iff this is a process-shared condvar.
      * Simple reference count used by both waiters and pthread_cond_destroy.
      (If the format of __wrefs is changed, update nptl_lock_constants.pysym
       and the pretty printers.)
    For each of the two groups, we have:
    __g_refs: Futex waiter reference count.
      * LSB is true if waiters should run futex_wake when they remove the
        last reference.
      * Reference count used by waiters concurrently with signalers that have
        acquired the condvar-internal lock.
    __g_signals: The number of signals that can still be consumed.
      * Used as a futex word by waiters.  Used concurrently by waiters and
        signalers.
      * LSB is true iff this group has been completely signaled (i.e., it is
        closed).
    __g_size: Waiters remaining in this group (i.e., which have not been
      signaled yet.
      * Accessed by signalers and waiters that cancel waiting (both do so only
        when having acquired the condvar-internal lock.
      * The size of G2 is always zero because it cannot be determined until
        the group becomes G1.
      * Although this is of unsigned type, we rely on using unsigned overflow
        rules to make this hold effectively negative values too (in
        particular, when waiters in G2 cancel waiting).

    A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
    a condvar that has G2 starting at position 0 and a G1 that is closed.

    Because waiters do not claim ownership of a group right when obtaining a
    position in __wseq but only reference count the group when using futexes
    to block, it can happen that a group gets closed before a waiter can
    increment the reference count.  Therefore, waiters have to check whether
    their group is already closed using __g1_start.  They also have to perform
    this check when spinning when trying to grab a signal from __g_signals.
    Note that for these checks, using relaxed MO to load __g1_start is
    sufficient because if a waiter can see a sufficiently large value, it could
    have also consume a signal in the waiters group.

    Waiters try to grab a signal from __g_signals without holding a reference
    count, which can lead to stealing a signal from a more recent group after
    their own group was already closed.  They cannot always detect whether they
    in fact did because they do not know when they stole, but they can
    conservatively add a signal back to the group they stole from; if they
    did so unnecessarily, all that happens is a spurious wake-up.  To make this
    even less likely, __g1_start contains the index of the current g2 too,
    which allows waiters to check if there aliasing on the group slots; if
    there wasn't, they didn't steal from the current G1, which means that the
    G1 they stole from must have been already closed and they do not need to
    fix anything.

    It is essential that the last field in pthread_cond_t is __g_signals[1]:
    The previous condvar used a pointer-sized field in pthread_cond_t, so a
    PTHREAD_COND_INITIALIZER from that condvar implementation might only
    initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
    in total instead of the 48 we need).  __g_signals[1] is not accessed before
    the first group switch (G2 starts at index 0), which will set its value to
    zero after a harmless fetch-or whose return value is ignored.  This
    effectively completes initialization.


    Limitations:
    * This condvar isn't designed to allow for more than
      __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
    * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
      supported.
    * Beyond what is allowed as errors by POSIX or documented, we can also
      return the following errors:
      * EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
      * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes.  Unlike
        for other errors, this can happen when we re-acquire the mutex; this
        isn't allowed by POSIX (which requires all errors to virtually happen
        before we release the mutex or change the condvar state), but there's
        nothing we can do really.
      * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
        returned by __pthread_tpp_change_priority.  We will already have
        released the mutex in such cases, so the caller cannot expect to own
        MUTEX.

    Other notes:
    * Instead of the normal mutex unlock / lock functions, we use
      __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
      because those will not change the mutex-internal users count, so that it
      can be detected when a condvar is still associated with a particular
      mutex because there is a waiter blocked on this condvar using this mutex.
 */
 static __always_inline int
 __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
     const struct timespec *abstime)
 {
   const int maxspin = 0;
   int err;
   int result = 0;

   LIBC_PROBE (cond_wait, 2, cond, mutex);

   /* Acquire a position (SEQ) in the waiter sequence (WSEQ).  We use an
      atomic operation because signals and broadcasts may update the group
      switch without acquiring the mutex.  We do not need release MO here
      because we do not need to establish any happens-before relation with
      signalers (see __pthread_cond_signal); modification order alone
      establishes a total order of waiters/signals.  We do need acquire MO
      to synchronize with group reinitialization in
      __condvar_quiesce_and_switch_g1.  */
   uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
   /* Find our group's index.  We always go into what was G2 when we acquired
      our position.  */
   unsigned int g = wseq & 1;
   uint64_t seq = wseq >> 1;

   /* Increase the waiter reference count.  Relaxed MO is sufficient because
      we only need to synchronize when decrementing the reference count.  */
   unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
   int private = __condvar_get_private (flags);

   /* Now that we are registered as a waiter, we can release the mutex.
      Waiting on the condvar must be atomic with releasing the mutex, so if
      the mutex is used to establish a happens-before relation with any
      signaler, the waiter must be visible to the latter; thus, we release the
      mutex after registering as waiter.
      If releasing the mutex fails, we just cancel our registration as a
      waiter and confirm that we have woken up.  */
   err = __pthread_mutex_unlock_usercnt (mutex, 0);
   if (__glibc_unlikely (err != 0))
     {
       __condvar_cancel_waiting (cond, seq, g, private);
       __condvar_confirm_wakeup (cond, private);
       return err;
     }

   /* Now wait until a signal is available in our group or it is closed.
      Acquire MO so that if we observe a value of zero written after group
      switching in __condvar_quiesce_and_switch_g1, we synchronize with that
      store and will see the prior update of __g1_start done while switching
      groups too.  */
   unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);

   do
     {
       while (1)
 	{
 	  /* Spin-wait first.
 	     Note that spinning first without checking whether a timeout
 	     passed might lead to what looks like a spurious wake-up even
 	     though we should return ETIMEDOUT (e.g., if the caller provides
 	     an absolute timeout that is clearly in the past).  However,
 	     (1) spurious wake-ups are allowed, (2) it seems unlikely that a
 	     user will (ab)use pthread_cond_wait as a check for whether a
 	     point in time is in the past, and (3) spinning first without
 	     having to compare against the current time seems to be the right
 	     choice from a performance perspective for most use cases.  */
 	  unsigned int spin = maxspin;
 	  while (signals == 0 && spin > 0)
 	    {
 	      /* Check that we are not spinning on a group that's already
 		 closed.  */
 	      if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
 		goto done;

 	      /* TODO Back off.  */

 	      /* Reload signals.  See above for MO.  */
 	      signals = atomic_load_acquire (cond->__data.__g_signals + g);
 	      spin--;
 	    }

 	  /* If our group will be closed as indicated by the flag on signals,
 	     don't bother grabbing a signal.  */
 	  if (signals & 1)
 	    goto done;

 	  /* If there is an available signal, don't block.  */
 	  if (signals != 0)
 	    break;

 	  /* No signals available after spinning, so prepare to block.
 	     We first acquire a group reference and use acquire MO for that so
 	     that we synchronize with the dummy read-modify-write in
 	     __condvar_quiesce_and_switch_g1 if we read from that.  In turn,
 	     in this case this will make us see the closed flag on __g_signals
 	     that designates a concurrent attempt to reuse the group's slot.
 	     We use acquire MO for the __g_signals check to make the
 	     __g1_start check work (see spinning above).
 	     Note that the group reference acquisition will not mask the
 	     release MO when decrementing the reference count because we use
 	     an atomic read-modify-write operation and thus extend the release
 	     sequence.  */
 	  atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
 	  if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
 	      || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
 	    {
 	      /* Our group is closed.  Wake up any signalers that might be
 		 waiting.  */
 	      __condvar_dec_grefs (cond, g, private);
 	      goto done;
 	    }

 	  // Now block.
 	  struct _pthread_cleanup_buffer buffer;
 	  struct _condvar_cleanup_buffer cbuffer;
 	  cbuffer.wseq = wseq;
 	  cbuffer.cond = cond;
 	  cbuffer.mutex = mutex;
 	  cbuffer.private = private;
 	  __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);

 	  if (abstime == NULL)
 	    {
 	      /* Block without a timeout.  */
 	      err = futex_wait_cancelable (
 		  cond->__data.__g_signals + g, 0, private);
 	    }
 	  else
 	    {
 	      /* Block, but with a timeout.
 		 Work around the fact that the kernel rejects negative timeout
 		 values despite them being valid.  */
 	      if (__glibc_unlikely (abstime->tv_sec < 0))
 	        err = ETIMEDOUT;

 	      else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
 		{
 		  /* CLOCK_MONOTONIC is requested.  */
 		  struct timespec rt;
 		  if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
 		    __libc_fatal ("clock_gettime does not support "
 				  "CLOCK_MONOTONIC");
 		  /* Convert the absolute timeout value to a relative
 		     timeout.  */
 		  rt.tv_sec = abstime->tv_sec - rt.tv_sec;
 		  rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
 		  if (rt.tv_nsec < 0)
 		    {
 		      rt.tv_nsec += 1000000000;
 		      --rt.tv_sec;
 		    }
 		  /* Did we already time out?  */
 		  if (__glibc_unlikely (rt.tv_sec < 0))
 		    err = ETIMEDOUT;
 		  else
 		    err = futex_reltimed_wait_cancelable
 			(cond->__data.__g_signals + g, 0, &rt, private);
 		}
 	      else
 		{
 		  /* Use CLOCK_REALTIME.  */
 		  err = futex_abstimed_wait_cancelable
 		      (cond->__data.__g_signals + g, 0, abstime, private);
 		}
 	    }

 	  __pthread_cleanup_pop (&buffer, 0);

 	  if (__glibc_unlikely (err == ETIMEDOUT))
 	    {
 	      __condvar_dec_grefs (cond, g, private);
 	      /* If we timed out, we effectively cancel waiting.  Note that
 		 we have decremented __g_refs before cancellation, so that a
 		 deadlock between waiting for quiescence of our group in
 		 __condvar_quiesce_and_switch_g1 and us trying to acquire
 		 the lock during cancellation is not possible.  */
 	      __condvar_cancel_waiting (cond, seq, g, private);
 	      result = ETIMEDOUT;
 	      goto done;
 	    }
 	  else
 	    __condvar_dec_grefs (cond, g, private);

 	  /* Reload signals.  See above for MO.  */
 	  signals = atomic_load_acquire (cond->__data.__g_signals + g);
 	}

     }
   /* Try to grab a signal.  Use acquire MO so that we see an up-to-date value
      of __g1_start below (see spinning above for a similar case).  In
      particular, if we steal from a more recent group, we will also see a
      more recent __g1_start below.  */
   while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
 						&signals, signals - 2));

   /* We consumed a signal but we could have consumed from a more recent group
      that aliased with ours due to being in the same group slot.  If this
      might be the case our group must be closed as visible through
      __g1_start.  */
   uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
   if (seq < (g1_start >> 1))
     {
       /* We potentially stole a signal from a more recent group but we do not
 	 know which group we really consumed from.
 	 We do not care about groups older than current G1 because they are
 	 closed; we could have stolen from these, but then we just add a
 	 spurious wake-up for the current groups.
 	 We will never steal a signal from current G2 that was really intended
 	 for G2 because G2 never receives signals (until it becomes G1).  We
 	 could have stolen a signal from G2 that was conservatively added by a
 	 previous waiter that also thought it stole a signal -- but given that
 	 that signal was added unnecessarily, it's not a problem if we steal
 	 it.
 	 Thus, the remaining case is that we could have stolen from the current
 	 G1, where "current" means the __g1_start value we observed.  However,
 	 if the current G1 does not have the same slot index as we do, we did
 	 not steal from it and do not need to undo that.  This is the reason
 	 for putting a bit with G2's index into__g1_start as well.  */
       if (((g1_start & 1) ^ 1) == g)
 	{
 	  /* We have to conservatively undo our potential mistake of stealing
 	     a signal.  We can stop trying to do that when the current G1
 	     changes because other spinning waiters will notice this too and
 	     __condvar_quiesce_and_switch_g1 has checked that there are no
 	     futex waiters anymore before switching G1.
 	     Relaxed MO is fine for the __g1_start load because we need to
 	     merely be able to observe this fact and not have to observe
 	     something else as well.
 	     ??? Would it help to spin for a little while to see whether the
 	     current G1 gets closed?  This might be worthwhile if the group is
 	     small or close to being closed.  */
 	  unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
 	  while (__condvar_load_g1_start_relaxed (cond) == g1_start)
 	    {
 	      /* Try to add a signal.  We don't need to acquire the lock
 		 because at worst we can cause a spurious wake-up.  If the
 		 group is in the process of being closed (LSB is true), this
 		 has an effect similar to us adding a signal.  */
 	      if (((s & 1) != 0)
 		  || atomic_compare_exchange_weak_relaxed
 		       (cond->__data.__g_signals + g, &s, s + 2))
 		{
 		  /* If we added a signal, we also need to add a wake-up on
 		     the futex.  We also need to do that if we skipped adding
 		     a signal because the group is being closed because
 		     while __condvar_quiesce_and_switch_g1 could have closed
 		     the group, it might stil be waiting for futex waiters to
 		     leave (and one of those waiters might be the one we stole
 		     the signal from, which cause it to block using the
 		     futex).  */
 		  futex_wake (cond->__data.__g_signals + g, 1, private);
 		  break;
 		}
 	      /* TODO Back off.  */
 	    }
 	}
     }

  done:

   /* Confirm that we have been woken.  We do that before acquiring the mutex
      to allow for execution of pthread_cond_destroy while having acquired the
      mutex.  */
   __condvar_confirm_wakeup (cond, private);

   /* Woken up; now re-acquire the mutex.  If this doesn't fail, return RESULT,
      which is set to ETIMEDOUT if a timeout occured, or zero otherwise.  */
   err = __pthread_mutex_cond_lock (mutex);
   /* XXX Abort on errors that are disallowed by POSIX?  */
   return (err != 0) ? err : result;
 }


 /* See __pthread_cond_wait_common.  */
 int
 __pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
   return __pthread_cond_wait_common (cond, mutex, NULL);
 }

 /* See __pthread_cond_wait_common.  */
 int
 __pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
     const struct timespec *abstime)
 {
   /* Check parameter validity.  This should also tell the compiler that
      it can assume that abstime is not NULL.  */
   if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
     return EINVAL;
   return __pthread_cond_wait_common (cond, mutex, abstime);
 }

 versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 		  GLIBC_2_3_2);
 versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
 		  GLIBC_2_3_2);