No public description

PiperOrigin-RevId: 812894538
Change-Id: I280fc3638c18270e8f5db563bcdbdb035e67cc2f
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/benchtests/pow-inputs b/google3/third_party/grte/v5_src/glibc-2.27/benchtests/pow-inputs
index 78f8ac7..4a51aac 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/benchtests/pow-inputs
+++ b/google3/third_party/grte/v5_src/glibc-2.27/benchtests/pow-inputs
@@ -302,8 +302,7 @@
 0x1.c004d2256a5b8p402, -0x1.a01df480fdcb7p98
 0x1.52b9d41aaa1e9p-589, -0x1.292cb15f1459dp46
 -0x1.ea9ca6fa0919ep-279, -0x1.601e44b6a588cp40
-# pow slow path at 240 bits
-# Implemented in sysdeps/ieee754/dbl-64/slowpow.c
+# old pow slow path at 240 bits
 ## name: 240bits
 0x1.01fcd33493ea3p596, -0x1.724bd4e887783p-14
 0x1.032ff59ab34fdp-540, -0x1.61e3632080b87p-24
@@ -405,8 +404,7 @@
 0x1.fae913d4f952ep-809, -0x1.4b649402fce63p-6
 0x1.fe6d725408f24p484, -0x1.25f4f6441d2e4p-12
 0x1.ff6393f9150ccp-718, 0x1.a0cb50a9bf2f3p-31
-# pow slowest path at 768 bits
-# Implemented in sysdeps/ieee754/dbl-64/slowpow.c
+# old pow slowest path at 768 bits
 ## name: 768bits
 1.0000000000000020, 1.5
 0x1.006777b4b61dep843, -0x1.67e3145491872p-1
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/config.make.in b/google3/third_party/grte/v5_src/glibc-2.27/config.make.in
index d967eaa..bf3d07e 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/config.make.in
+++ b/google3/third_party/grte/v5_src/glibc-2.27/config.make.in
@@ -74,6 +74,7 @@
 use-default-link = @use_default_link@
 have-cxx-thread_local = @libc_cv_cxx_thread_local@
 have-loop-to-function = @libc_cv_cc_loop_to_function@
+have-textrel_ifunc = @libc_cv_textrel_ifunc@
 
 multi-arch = @multi_arch@
 
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/configure b/google3/third_party/grte/v5_src/glibc-2.27/configure
index 4f7b42a..9bee3fd 100755
--- a/google3/third_party/grte/v5_src/glibc-2.27/configure
+++ b/google3/third_party/grte/v5_src/glibc-2.27/configure
@@ -663,6 +663,7 @@
 sysnames
 submachine
 multi_arch
+libc_cv_textrel_ifunc
 no_stack_protector
 stack_protector
 libc_cv_ssp
@@ -4073,6 +4074,56 @@
 config_vars="$config_vars
 have-gnu-retain = $libc_cv_gnu_retain"
 
+# Check if linker supports textrel relocation with ifunc (used on elf/tests).
+# Note that it relies on libc_cv_ld_gnu_indirect_function test above.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the linker supports textrels along with ifunc" >&5
+$as_echo_n "checking whether the linker supports textrels along with ifunc... " >&6; }
+if ${libc_cv_textrel_ifunc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.S <<EOF
+.type foo,%gnu_indirect_function
+foo:
+.globl _start
+_start:
+.globl __start
+__start:
+.data
+#ifdef _LP64
+.quad foo
+#else
+.long foo
+#endif
+.text
+.globl address
+address:
+#ifdef _LP64
+.quad address
+#else
+.long address
+#endif
+EOF
+libc_cv_textrel_ifunc=no
+if test "$with_lld" = yes; then
+  LLD_TEXTREL_FLAG="-Wl,-z,notext" # lld by default does not support textrel.
+else
+  LLD_TEXTREL_FLAG=""
+fi
+if test $libc_cv_ld_gnu_indirect_function = yes; then
+   if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $LLD_TEXTREL_FLAG -nostartfiles -nostdlib $no_ssp -pie -o conftest conftest.S'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+     libc_cv_textrel_ifunc=yes
+   fi
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_textrel_ifunc" >&5
+$as_echo "$libc_cv_textrel_ifunc" >&6; }
+
 # Check if gcc warns about alias for function with incompatible types.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler warns about alias for function with incompatible types" >&5
 $as_echo_n "checking if compiler warns about alias for function with incompatible types... " >&6; }
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/configure.ac b/google3/third_party/grte/v5_src/glibc-2.27/configure.ac
index c0638a1..c70b78d 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/configure.ac
+++ b/google3/third_party/grte/v5_src/glibc-2.27/configure.ac
@@ -687,6 +687,46 @@
 fi
 LIBC_CONFIG_VAR([have-gnu-retain], [$libc_cv_gnu_retain])
 
+# Check if linker supports textrel relocation with ifunc (used on elf/tests).
+# Note that it relies on libc_cv_ld_gnu_indirect_function test above.
+AC_CACHE_CHECK([whether the linker supports textrels along with ifunc],
+               libc_cv_textrel_ifunc, [dnl
+cat > conftest.S <<EOF
+.type foo,%gnu_indirect_function
+foo:
+.globl _start
+_start:
+.globl __start
+__start:
+.data
+#ifdef _LP64
+.quad foo
+#else
+.long foo
+#endif
+.text
+.globl address
+address:
+#ifdef _LP64
+.quad address
+#else
+.long address
+#endif
+EOF
+libc_cv_textrel_ifunc=no
+if test "$with_lld" = yes; then
+  LLD_TEXTREL_FLAG="-Wl,-z,notext" # lld by default disallows textrels.
+else
+  LLD_TEXTREL_FLAG=""
+fi
+if test $libc_cv_ld_gnu_indirect_function = yes; then
+   if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $LLD_TEXTREL_FLAG -nostartfiles -nostdlib $no_ssp -pie -o conftest conftest.S); then
+     libc_cv_textrel_ifunc=yes
+   fi
+fi
+rm -f conftest*])
+AC_SUBST(libc_cv_textrel_ifunc)
+
 # Check if gcc warns about alias for function with incompatible types.
 AC_CACHE_CHECK([if compiler warns about alias for function with incompatible types],
 	       libc_cv_gcc_incompatible_alias, [dnl
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/elf/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/elf/Makefile
index 7b44740..f15695f 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/elf/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/elf/Makefile
@@ -390,6 +390,9 @@
 ifeq (yes,$(have-fpie))
 ifunc-pie-tests = ifuncmain1pie ifuncmain1vispie ifuncmain1staticpie \
 		  ifuncmain5pie ifuncmain6pie ifuncmain7pie
+ifeq (yes,$(have-textrel_ifunc))
+ifunc-pie-tests += tst-ifunc-textrel
+endif
 tests-internal += $(ifunc-pie-tests)
 tests-pie += $(ifunc-pie-tests)
 endif
@@ -1298,6 +1301,7 @@
 CFLAGS-ifuncmain5pie.c += $(pie-ccflag)
 CFLAGS-ifuncmain6pie.c += $(pie-ccflag)
 CFLAGS-ifuncmain7pie.c += $(pie-ccflag)
+CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag)
 
 $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so
 $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/elf/dl-reloc.c b/google3/third_party/grte/v5_src/glibc-2.27/elf/dl-reloc.c
index 9f743f5..e42583a 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/elf/dl-reloc.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/elf/dl-reloc.c
@@ -284,17 +284,6 @@
 	    newp->start = PTR_ALIGN_DOWN (ph->p_vaddr, GLRO(dl_pagesize))
 			  + (caddr_t) l->l_addr;
 
-	    if (__mprotect (newp->start, newp->len, PROT_READ|PROT_WRITE) < 0)
-	      {
-		errstring = N_("cannot make segment writable for relocation");
-	      call_error:
-		_dl_signal_error (errno, l->l_name, NULL, errstring);
-	      }
-
-#if (PF_R | PF_W | PF_X) == 7 && (PROT_READ | PROT_WRITE | PROT_EXEC) == 7
-	    newp->prot = (PF_TO_PROT
-			  >> ((ph->p_flags & (PF_R | PF_W | PF_X)) * 4)) & 0xf;
-#else
 	    newp->prot = 0;
 	    if (ph->p_flags & PF_R)
 	      newp->prot |= PROT_READ;
@@ -302,7 +291,14 @@
 	      newp->prot |= PROT_WRITE;
 	    if (ph->p_flags & PF_X)
 	      newp->prot |= PROT_EXEC;
-#endif
+
+	    if (__mprotect (newp->start, newp->len, newp->prot|PROT_WRITE) < 0)
+	      {
+		errstring = N_("cannot make segment writable for relocation");
+	      call_error:
+		_dl_signal_error (errno, l->l_name, NULL, errstring);
+	      }
+
 	    newp->next = textrels;
 	    textrels = newp;
 	  }
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/elf/tst-ifunc-textrel.c b/google3/third_party/grte/v5_src/glibc-2.27/elf/tst-ifunc-textrel.c
new file mode 100644
index 0000000..d34c4db
--- /dev/null
+++ b/google3/third_party/grte/v5_src/glibc-2.27/elf/tst-ifunc-textrel.c
@@ -0,0 +1,45 @@
+/* Check DT_TEXTREL/DF_TEXTREL support with ifunc.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+/* Force a text relocation in the object.  */
+static const uintptr_t
+address __attribute__((section(".text"))) = (uintptr_t) &address;
+
+static uintptr_t
+foo_impl (void)
+{
+  return address;
+}
+
+void *
+__attribute__((noinline))
+foo (void)
+{
+  return (void*) foo_impl;
+}
+__asm__ (".type foo, %gnu_indirect_function");
+
+static int
+do_test (void)
+{
+  return (uintptr_t) foo () != 0 ? 0 : 1;
+}
+
+#include <support/test-driver.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/gmon/gmon.c b/google3/third_party/grte/v5_src/glibc-2.27/gmon/gmon.c
index dee6480..4712d9f 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/gmon/gmon.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/gmon/gmon.c
@@ -132,7 +132,7 @@
   p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
   p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
   p->textsize = p->highpc - p->lowpc;
-  p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->froms));
+  p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->kcount));
   p->hashfraction = HASHFRACTION;
   p->log_hashfraction = -1;
   /* The following test must be kept in sync with the corresponding
@@ -142,7 +142,7 @@
 	 instead of integer division.  Precompute shift amount. */
       p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
   }
-  p->fromssize = p->textsize / HASHFRACTION;
+  p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
   p->tolimit = p->textsize * ARCDENSITY / 100;
   if (p->tolimit < MINARCS)
     p->tolimit = MINARCS;
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/locale/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/locale/Makefile
index fd99722..efcc623 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/locale/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/locale/Makefile
@@ -74,9 +74,17 @@
 $(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
 $(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
 
-C-translit.h: C-translit.h.in gen-translit.pl
-	$(PERL) gen-translit.pl < $< > $@.tmp
-	mv -f $@.tmp $@
+# The gen-translit.pl tool requires gcc for its preprocessor,
+# which is unavailable on Forge.
+# Since the generated C-translit.h file is already stored in the source tree,
+# we just comment out this rule for now.
+# If we need to regenerate this file, we should pull in the updated version of
+# gen-translit.pl, as it no longer has a dependency on the gcc preprocessor:
+# https://sourceware.org/git/?p=glibc.git;a=commit;h=053c52b17739a584ee73d336e547b15abcdabd49
+#
+# C-translit.h: C-translit.h.in gen-translit.pl
+# 	$(PERL) gen-translit.pl < $< > $@.tmp
+# 	mv -f $@.tmp $@
 
 # The path to the compiled binary locale archive or compiled locales,
 # along with the parent path to the source locales and source
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/malloc/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/malloc/Makefile
index 17873e6..d20c3f1 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/malloc/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/malloc/Makefile
@@ -230,6 +230,14 @@
 # the test skeleton.
 $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT
 
+# Clang optimizes away calls to malloc.h functions or otherwise makes
+# assumptions that break GNU libc tests.
+# For instance, `malloc` in glibc affects errno, but clang assumes that errno
+# will be unchanged: https://github.com/llvm/llvm-project/issues/114772.
+ifeq ($(with-clang),yes)
+$(tests:%=$(objpfx)%.o): CFLAGS += -fno-builtin
+endif
+
 $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o
 $(objpfx)tst-interpose-thread: \
   $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/math/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/math/Makefile
index ec7d91c..d5872c4 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/math/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/math/Makefile
@@ -113,9 +113,9 @@
 
 # double support
 type-double-suffix :=
-type-double-routines := branred doasin dosincos halfulp mpa mpatan2	\
+type-double-routines := branred doasin dosincos mpa mpatan2	\
 		       mpatan mpexp mplog mpsqrt mptan sincos32 slowexp	\
-		       slowpow sincostab k_rem_pio2
+		       sincostab k_rem_pio2
 
 # float support
 type-float-suffix := f
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/nptl/Makefile
index fae3c05..dd6ab0c 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/Makefile
@@ -547,7 +547,7 @@
 CFLAGS-tst-oncex4.c += -fexceptions
 CFLAGS-tst-align.c += $(stack-align-test-flags)
 CFLAGS-tst-align3.c += $(stack-align-test-flags)
-CFLAGS-tst-initializers1.c += -W -Wall -Werror
+CFLAGS-tst-initializers1.c += -W -Wall -Werror -Wno-error=macro-redefined
 CFLAGS-tst-initializers1-< = $(CFLAGS-tst-initializers1.c) \
 			     $(patsubst tst-initializers1-%.c,-std=%,$<)
 CFLAGS-tst-initializers1-c89.c += $(CFLAGS-tst-initializers1-<)
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/allocatestack.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/allocatestack.c
index 1989ca7..e02fe1c 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/allocatestack.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/allocatestack.c
@@ -1247,8 +1247,12 @@
   /* Pairs against the read barrier in tls_get_attr_tail, guaranteeing
      any thread waiting for an update to pointer.val sees the
      initimage write.  */
-  atomic_write_barrier ();
-  dtv[map->l_tls_modid].pointer.val = dest;
+
+  // These lines are believed to be unnecessary and cause a use-after-free.
+  // See http://b/369943713 and http://b/72697501.
+  //
+  // atomic_write_barrier ();
+  // dtv[map->l_tls_modid].pointer.val = dest;
 }
 
 void
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_broadcast.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_broadcast.c
index e6bcb9b..14d3e53 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_broadcast.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_broadcast.c
@@ -58,10 +58,10 @@
     {
       /* Add as many signals as the remaining size of the group.  */
       atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
-				cond->__data.__g_size[g1] << 1);
+				cond->__data.__g_size[g1]);
       cond->__data.__g_size[g1] = 0;
 
-      /* We need to wake G1 waiters before we quiesce G1 below.  */
+      /* We need to wake G1 waiters before we switch G1 below.  */
       /* TODO Only set it if there are indeed futex waiters.  We could
 	 also try to move this out of the critical section in cases when
 	 G2 is empty (and we don't need to quiesce).  */
@@ -70,11 +70,11 @@
 
   /* G1 is complete.  Step (2) is next unless there are no waiters in G2, in
      which case we can stop.  */
-  if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+  if (__condvar_switch_g1 (cond, wseq, &g1, private))
     {
       /* Step (3): Send signals to all waiters in the old G2 / new G1.  */
       atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
-				cond->__data.__g_size[g1] << 1);
+				cond->__data.__g_size[g1]);
       cond->__data.__g_size[g1] = 0;
       /* TODO Only set it if there are indeed futex waiters.  */
       do_futex_wake = true;
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_common.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_common.c
index 8e425eb..1fe0448 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_common.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_common.c
@@ -329,19 +329,17 @@
     return FUTEX_SHARED;
 }
 
-/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to
-   leave G1, converts G1 into a fresh G2, and then switches group roles so that
-   the former G2 becomes the new G1 ending at the current __wseq value when we
-   eventually make the switch (WSEQ is just an observation of __wseq by the
-   signaler).
+/* This closes G1 (whose index is in G1INDEX), converts G1 into a fresh G2,
+   and then switches group roles so that the former G2 becomes the new G1
+   ending at the current __wseq value when we eventually make the switch
+   (WSEQ is just an observation of __wseq by the signaler).
    If G2 is empty, it will not switch groups because then it would create an
    empty G1 which would require switching groups again on the next signal.
    Returns false iff groups were not switched because G2 was empty.  */
 static bool __attribute__ ((unused))
-__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
+__condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
     unsigned int *g1index, int private)
 {
-  const unsigned int maxspin = 0;
   unsigned int g1 = *g1index;
 
   /* If there is no waiter in G2, we don't do anything.  The expression may
@@ -350,92 +348,23 @@
      behavior.
      Note that this works correctly for a zero-initialized condvar too.  */
   unsigned int old_orig_size = __condvar_get_orig_size (cond);
-  uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
-  if (((unsigned) (wseq - old_g1_start - old_orig_size)
-	  + cond->__data.__g_size[g1 ^ 1]) == 0)
+  uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond);
+  uint64_t new_g1_start = old_g1_start + old_orig_size;
+  if (((unsigned) (wseq - new_g1_start) + cond->__data.__g_size[g1 ^ 1]) == 0)
 	return false;
 
-  /* Now try to close and quiesce G1.  We have to consider the following kinds
-     of waiters:
+  /* We have to consider the following kinds of waiters:
      * Waiters from less recent groups than G1 are not affected because
        nothing will change for them apart from __g1_start getting larger.
      * New waiters arriving concurrently with the group switching will all go
        into G2 until we atomically make the switch.  Waiters existing in G2
        are not affected.
-     * Waiters in G1 will be closed out immediately by setting a flag in
-       __g_signals, which will prevent waiters from blocking using a futex on
-       __g_signals and also notifies them that the group is closed.  As a
-       result, they will eventually remove their group reference, allowing us
-       to close switch group roles.  */
+     * Waiters in G1 have already received a signal and been woken.  */
 
-  /* First, set the closed flag on __g_signals.  This tells waiters that are
-     about to wait that they shouldn't do that anymore.  This basically
-     serves as an advance notificaton of the upcoming change to __g1_start;
-     waiters interpret it as if __g1_start was larger than their waiter
-     sequence position.  This allows us to change __g1_start after waiting
-     for all existing waiters with group references to leave, which in turn
-     makes recovery after stealing a signal simpler because it then can be
-     skipped if __g1_start indicates that the group is closed (otherwise,
-     we would have to recover always because waiters don't know how big their
-     groups are).  Relaxed MO is fine.  */
-  atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1);
-
-  /* Wait until there are no group references anymore.  The fetch-or operation
-     injects us into the modification order of __g_refs; release MO ensures
-     that waiters incrementing __g_refs after our fetch-or see the previous
-     changes to __g_signals and to __g1_start that had to happen before we can
-     switch this G1 and alias with an older group (we have two groups, so
-     aliasing requires switching group roles twice).  Note that nobody else
-     can have set the wake-request flag, so we do not have to act upon it.
-
-     Also note that it is harmless if older waiters or waiters from this G1
-     get a group reference after we have quiesced the group because it will
-     remain closed for them either because of the closed flag in __g_signals
-     or the later update to __g1_start.  New waiters will never arrive here
-     but instead continue to go into the still current G2.  */
-  unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0);
-  while ((r >> 1) > 0)
-    {
-      for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--)
-	{
-	  /* TODO Back off.  */
-	  r = atomic_load_relaxed (cond->__data.__g_refs + g1);
-	}
-      if ((r >> 1) > 0)
-	{
-	  /* There is still a waiter after spinning.  Set the wake-request
-	     flag and block.  Relaxed MO is fine because this is just about
-	     this futex word.  */
-	  r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1);
-
-	  if ((r >> 1) > 0)
-	    futex_wait_simple (cond->__data.__g_refs + g1, r, private);
-	  /* Reload here so we eventually see the most recent value even if we
-	     do not spin.   */
-	  r = atomic_load_relaxed (cond->__data.__g_refs + g1);
-	}
-    }
-  /* Acquire MO so that we synchronize with the release operation that waiters
-     use to decrement __g_refs and thus happen after the waiters we waited
-     for.  */
-  atomic_thread_fence_acquire ();
-
-  /* Update __g1_start, which finishes closing this group.  The value we add
-     will never be negative because old_orig_size can only be zero when we
-     switch groups the first time after a condvar was initialized, in which
-     case G1 will be at index 1 and we will add a value of 1.  See above for
-     why this takes place after waiting for quiescence of the group.
-     Relaxed MO is fine because the change comes with no additional
-     constraints that others would have to observe.  */
-  __condvar_add_g1_start_relaxed (cond,
-      (old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
-
-  /* Now reopen the group, thus enabling waiters to again block using the
-     futex controlled by __g_signals.  Release MO so that observers that see
-     no signals (and thus can block) also see the write __g1_start and thus
-     that this is now a new group (see __pthread_cond_wait_common for the
-     matching acquire MO loads).  */
-  atomic_store_release (cond->__data.__g_signals + g1, 0);
+  /* Update __g1_start, which closes this group.  Relaxed MO is fine because
+     the change comes with no additional constraints that others would have
+     to observe.  */
+  __condvar_add_g1_start_relaxed (cond, old_orig_size);
 
   /* At this point, the old G1 is now a valid new G2 (but not in use yet).
      No old waiter can neither grab a signal nor acquire a reference without
@@ -447,9 +376,13 @@
   g1 ^= 1;
   *g1index ^= 1;
 
+  /* Now advance the new G1 g_signals to the new g1_start, giving it
+     an effective signal count of 0 to start.  */
+  atomic_store_release (cond->__data.__g_signals + g1, (unsigned)new_g1_start);
+
   /* These values are just observed by signalers, and thus protected by the
      lock.  */
-  unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
+  unsigned int orig_size = wseq - new_g1_start;
   __condvar_set_orig_size (cond, orig_size);
   /* Use and addition to not loose track of cancellations in what was
      previously G2.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_signal.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_signal.c
index 3db3d1f..9f04833 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_signal.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_signal.c
@@ -70,19 +70,18 @@
   bool do_futex_wake = false;
 
   /* If G1 is still receiving signals, we put the signal there.  If not, we
-     check if G2 has waiters, and if so, quiesce and switch G1 to the former
-     G2; if this results in a new G1 with waiters (G2 might have cancellations
-     already, see __condvar_quiesce_and_switch_g1), we put the signal in the
-     new G1.  */
+     check if G2 has waiters, and if so, switch G1 to the former G2; if this
+     results in a new G1 with waiters (G2 might have cancellations already,
+     see __condvar_switch_g1), we put the signal in the new G1. */
   if ((cond->__data.__g_size[g1] != 0)
-      || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+      || __condvar_switch_g1 (cond, wseq, &g1, private))
     {
       /* Add a signal.  Relaxed MO is fine because signaling does not need to
-	 establish a happens-before relation (see above).  We do not mask the
-	 release-MO store when initializing a group in
-	 __condvar_quiesce_and_switch_g1 because we use an atomic
-	 read-modify-write and thus extend that store's release sequence.  */
-      atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2);
+         establish a happens-before relation (see above).  We do not mask the
+         release-MO store when initializing a group in __condvar_switch_g1
+         because we use an atomic read-modify-write and thus extend that
+         store's release sequence.  */
+      atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 1);
       cond->__data.__g_size[g1]--;
       /* TODO Only set it if there are indeed futex waiters.  */
       do_futex_wake = true;
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_wait.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_wait.c
index 0dbab42..1f9bd75 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_wait.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_cond_wait.c
@@ -85,7 +85,7 @@
      not hold a reference on the group.  */
   __condvar_acquire_lock (cond, private);
 
-  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
   if (g1_start > seq)
     {
       /* Our group is closed, so someone provided enough signals for it.
@@ -144,23 +144,6 @@
     }
 }
 
-/* Wake up any signalers that might be waiting.  */
-static void
-__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
-{
-  /* Release MO to synchronize-with the acquire load in
-     __condvar_quiesce_and_switch_g1.  */
-  if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
-    {
-      /* Clear the wake-up request flag before waking up.  We do not need more
-	 than relaxed MO and it doesn't matter if we apply this for an aliased
-	 group because we wake all futex waiters right after clearing the
-	 flag.  */
-      atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
-      futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
-    }
-}
-
 /* Clean-up for cancellation of waiters waiting for normal signals.  We cancel
    our registration as a waiter, confirm we have woken up, and re-acquire the
    mutex.  */
@@ -172,8 +155,6 @@
   pthread_cond_t *cond = cbuffer->cond;
   unsigned g = cbuffer->wseq & 1;
 
-  __condvar_dec_grefs (cond, g, cbuffer->private);
-
   __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
   /* FIXME With the current cancellation implementation, it is possible that
      a thread is cancelled after it has returned from a syscall.  This could
@@ -239,9 +220,7 @@
    signaled), and a reference count.
 
    The group reference count is used to maintain the number of waiters that
-   are using the group's futex.  Before a group can change its role, the
-   reference count must show that no waiters are using the futex anymore; this
-   prevents ABA issues on the futex word.
+   are using the group's futex.
 
    To represent which intervals in the waiter sequence the groups cover (and
    thus also which group slot contains G1 or G2), we use a 64b counter to
@@ -252,7 +231,7 @@
    figure out whether they are in a group that has already been completely
    signaled (i.e., if the current G1 starts at a later position that the
    waiter's position).  Waiters cannot determine whether they are currently
-   in G2 or G1 -- but they do not have too because all they are interested in
+   in G2 or G1 -- but they do not have to because all they are interested in
    is whether there are available signals, and they always start in G2 (whose
    group slot they know because of the bit in the waiter sequence.  Signalers
    will simply fill the right group until it is completely signaled and can
@@ -281,7 +260,6 @@
      * Waiters fetch-add while having acquire the mutex associated with the
        condvar.  Signalers load it and fetch-xor it concurrently.
    __g1_start: Starting position of G1 (inclusive)
-     * LSB is index of current G2.
      * Modified by signalers while having acquired the condvar-internal lock
        and observed concurrently by waiters.
    __g1_orig_size: Initial size of G1
@@ -301,11 +279,10 @@
        last reference.
      * Reference count used by waiters concurrently with signalers that have
        acquired the condvar-internal lock.
-   __g_signals: The number of signals that can still be consumed.
+   __g_signals: The number of signals that can still be consumed, relative to
+     the current g1_start.  (i.e. g1_start with the signal count added)
      * Used as a futex word by waiters.  Used concurrently by waiters and
        signalers.
-     * LSB is true iff this group has been completely signaled (i.e., it is
-       closed).
    __g_size: Waiters remaining in this group (i.e., which have not been
      signaled yet.
      * Accessed by signalers and waiters that cancel waiting (both do so only
@@ -329,27 +306,6 @@
    sufficient because if a waiter can see a sufficiently large value, it could
    have also consume a signal in the waiters group.
 
-   Waiters try to grab a signal from __g_signals without holding a reference
-   count, which can lead to stealing a signal from a more recent group after
-   their own group was already closed.  They cannot always detect whether they
-   in fact did because they do not know when they stole, but they can
-   conservatively add a signal back to the group they stole from; if they
-   did so unnecessarily, all that happens is a spurious wake-up.  To make this
-   even less likely, __g1_start contains the index of the current g2 too,
-   which allows waiters to check if there aliasing on the group slots; if
-   there wasn't, they didn't steal from the current G1, which means that the
-   G1 they stole from must have been already closed and they do not need to
-   fix anything.
-
-   It is essential that the last field in pthread_cond_t is __g_signals[1]:
-   The previous condvar used a pointer-sized field in pthread_cond_t, so a
-   PTHREAD_COND_INITIALIZER from that condvar implementation might only
-   initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
-   in total instead of the 48 we need).  __g_signals[1] is not accessed before
-   the first group switch (G2 starts at index 0), which will set its value to
-   zero after a harmless fetch-or whose return value is ignored.  This
-   effectively completes initialization.
-
 
    Limitations:
    * This condvar isn't designed to allow for more than
@@ -380,7 +336,6 @@
 __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
     const struct timespec *abstime)
 {
-  const int maxspin = 0;
   int err;
   int result = 0;
 
@@ -392,8 +347,7 @@
      because we do not need to establish any happens-before relation with
      signalers (see __pthread_cond_signal); modification order alone
      establishes a total order of waiters/signals.  We do need acquire MO
-     to synchronize with group reinitialization in
-     __condvar_quiesce_and_switch_g1.  */
+     to synchronize with group reinitialization in __condvar_switch_g1.  */
   uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
   /* Find our group's index.  We always go into what was G2 when we acquired
      our position.  */
@@ -420,199 +374,84 @@
       return err;
     }
 
-  /* Now wait until a signal is available in our group or it is closed.
-     Acquire MO so that if we observe a value of zero written after group
-     switching in __condvar_quiesce_and_switch_g1, we synchronize with that
-     store and will see the prior update of __g1_start done while switching
-     groups too.  */
-  unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
-
-  do
+  while (1)
     {
-      while (1)
-	{
-	  /* Spin-wait first.
-	     Note that spinning first without checking whether a timeout
-	     passed might lead to what looks like a spurious wake-up even
-	     though we should return ETIMEDOUT (e.g., if the caller provides
-	     an absolute timeout that is clearly in the past).  However,
-	     (1) spurious wake-ups are allowed, (2) it seems unlikely that a
-	     user will (ab)use pthread_cond_wait as a check for whether a
-	     point in time is in the past, and (3) spinning first without
-	     having to compare against the current time seems to be the right
-	     choice from a performance perspective for most use cases.  */
-	  unsigned int spin = maxspin;
-	  while (signals == 0 && spin > 0)
-	    {
-	      /* Check that we are not spinning on a group that's already
-		 closed.  */
-	      if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
-		goto done;
+      /* Now wait until a signal is available in our group or it is closed.
+         Acquire MO so that if we observe (signals == lowseq) after group
+         switching in __condvar_switch_g1, we synchronize with that store and
+         will see the prior update of __g1_start done while switching groups
+         too.  */
+      unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
+      uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
 
-	      /* TODO Back off.  */
+      if (seq < g1_start)
+        {
+          /* If the group is closed already,
+             then this waiter originally had enough extra signals to
+             consume, up until the time its group was closed.  */
+           break;
+        }
 
-	      /* Reload signals.  See above for MO.  */
-	      signals = atomic_load_acquire (cond->__data.__g_signals + g);
-	      spin--;
-	    }
+      /* If there is an available signal, don't block.
+         If __g1_start has advanced at all, then we must be in G1
+         by now, perhaps in the process of switching back to an older
+         G2, but in either case we're allowed to consume the available
+         signal and should not block anymore.  */
+      if ((int)(signals - (unsigned int)g1_start) > 0)
+        {
+          /* Try to grab a signal.  See above for MO.  (if we do another loop
+             iteration we need to see the correct value of g1_start)  */
+	        if (atomic_compare_exchange_weak_acquire (
+                      cond->__data.__g_signals + g,
+                      &signals, signals - 1))
+            break;
+          else
+            continue;
+        }
 
-	  /* If our group will be closed as indicated by the flag on signals,
-	     don't bother grabbing a signal.  */
-	  if (signals & 1)
-	    goto done;
+      // Now block.
+      struct _pthread_cleanup_buffer buffer;
+      struct _condvar_cleanup_buffer cbuffer;
+      cbuffer.wseq = wseq;
+      cbuffer.cond = cond;
+      cbuffer.mutex = mutex;
+      cbuffer.private = private;
+      __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
 
-	  /* If there is an available signal, don't block.  */
-	  if (signals != 0)
-	    break;
+      if (abstime == NULL)
+        {
+          /* Block without a timeout.  */
+          err = futex_wait_cancelable (
+        cond->__data.__g_signals + g, signals, private);
+        }
+      else
+        {
+          /* Block, but with a timeout.
+             Work around the fact that the kernel rejects negative timeout
+             values despite them being valid.  */
+          if (__glibc_unlikely (abstime->tv_sec < 0))
+            err = ETIMEDOUT;
 
-	  /* No signals available after spinning, so prepare to block.
-	     We first acquire a group reference and use acquire MO for that so
-	     that we synchronize with the dummy read-modify-write in
-	     __condvar_quiesce_and_switch_g1 if we read from that.  In turn,
-	     in this case this will make us see the closed flag on __g_signals
-	     that designates a concurrent attempt to reuse the group's slot.
-	     We use acquire MO for the __g_signals check to make the
-	     __g1_start check work (see spinning above).
-	     Note that the group reference acquisition will not mask the
-	     release MO when decrementing the reference count because we use
-	     an atomic read-modify-write operation and thus extend the release
-	     sequence.  */
-	  atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
-	  if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
-	      || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
-	    {
-	      /* Our group is closed.  Wake up any signalers that might be
-		 waiting.  */
-	      __condvar_dec_grefs (cond, g, private);
-	      goto done;
-	    }
+          else
+            {
+              err = futex_abstimed_wait_cancelable
+                  (cond->__data.__g_signals + g, signals, abstime,
+                  (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0
+                                ? 0
+                                : FUTEX_CLOCK_REALTIME, private);
+            }
+        }
 
-	  // Now block.
-	  struct _pthread_cleanup_buffer buffer;
-	  struct _condvar_cleanup_buffer cbuffer;
-	  cbuffer.wseq = wseq;
-	  cbuffer.cond = cond;
-	  cbuffer.mutex = mutex;
-	  cbuffer.private = private;
-	  __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+      __pthread_cleanup_pop (&buffer, 0);
 
-	  if (abstime == NULL)
-	    {
-	      /* Block without a timeout.  */
-	      err = futex_wait_cancelable (
-		  cond->__data.__g_signals + g, 0, private);
-	    }
-	  else
-	    {
-	      /* Block, but with a timeout.
-		 Work around the fact that the kernel rejects negative timeout
-		 values despite them being valid.  */
-	      if (__glibc_unlikely (abstime->tv_sec < 0))
-	        err = ETIMEDOUT;
-
-	      else
-		{
-		  err = futex_abstimed_wait_cancelable
-		      (cond->__data.__g_signals + g, 0, abstime,
-		      (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0
-                        ? 0
-                        : FUTEX_CLOCK_REALTIME, private);
-		}
-	    }
-
-	  __pthread_cleanup_pop (&buffer, 0);
-
-	  if (__glibc_unlikely (err == ETIMEDOUT))
-	    {
-	      __condvar_dec_grefs (cond, g, private);
-	      /* If we timed out, we effectively cancel waiting.  Note that
-		 we have decremented __g_refs before cancellation, so that a
-		 deadlock between waiting for quiescence of our group in
-		 __condvar_quiesce_and_switch_g1 and us trying to acquire
-		 the lock during cancellation is not possible.  */
-	      __condvar_cancel_waiting (cond, seq, g, private);
-	      result = ETIMEDOUT;
-	      goto done;
-	    }
-	  else
-	    __condvar_dec_grefs (cond, g, private);
-
-	  /* Reload signals.  See above for MO.  */
-	  signals = atomic_load_acquire (cond->__data.__g_signals + g);
-	}
-
+      if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW))
+        {
+          /* If we timed out, we effectively cancel waiting.  */
+          __condvar_cancel_waiting (cond, seq, g, private);
+          result = err;
+          break;
+        }
     }
-  /* Try to grab a signal.  Use acquire MO so that we see an up-to-date value
-     of __g1_start below (see spinning above for a similar case).  In
-     particular, if we steal from a more recent group, we will also see a
-     more recent __g1_start below.  */
-  while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
-						&signals, signals - 2));
-
-  /* We consumed a signal but we could have consumed from a more recent group
-     that aliased with ours due to being in the same group slot.  If this
-     might be the case our group must be closed as visible through
-     __g1_start.  */
-  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
-  if (seq < (g1_start >> 1))
-    {
-      /* We potentially stole a signal from a more recent group but we do not
-	 know which group we really consumed from.
-	 We do not care about groups older than current G1 because they are
-	 closed; we could have stolen from these, but then we just add a
-	 spurious wake-up for the current groups.
-	 We will never steal a signal from current G2 that was really intended
-	 for G2 because G2 never receives signals (until it becomes G1).  We
-	 could have stolen a signal from G2 that was conservatively added by a
-	 previous waiter that also thought it stole a signal -- but given that
-	 that signal was added unnecessarily, it's not a problem if we steal
-	 it.
-	 Thus, the remaining case is that we could have stolen from the current
-	 G1, where "current" means the __g1_start value we observed.  However,
-	 if the current G1 does not have the same slot index as we do, we did
-	 not steal from it and do not need to undo that.  This is the reason
-	 for putting a bit with G2's index into__g1_start as well.  */
-      if (((g1_start & 1) ^ 1) == g)
-	{
-	  /* We have to conservatively undo our potential mistake of stealing
-	     a signal.  We can stop trying to do that when the current G1
-	     changes because other spinning waiters will notice this too and
-	     __condvar_quiesce_and_switch_g1 has checked that there are no
-	     futex waiters anymore before switching G1.
-	     Relaxed MO is fine for the __g1_start load because we need to
-	     merely be able to observe this fact and not have to observe
-	     something else as well.
-	     ??? Would it help to spin for a little while to see whether the
-	     current G1 gets closed?  This might be worthwhile if the group is
-	     small or close to being closed.  */
-	  unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
-	  while (__condvar_load_g1_start_relaxed (cond) == g1_start)
-	    {
-	      /* Try to add a signal.  We don't need to acquire the lock
-		 because at worst we can cause a spurious wake-up.  If the
-		 group is in the process of being closed (LSB is true), this
-		 has an effect similar to us adding a signal.  */
-	      if (((s & 1) != 0)
-		  || atomic_compare_exchange_weak_relaxed
-		       (cond->__data.__g_signals + g, &s, s + 2))
-		{
-		  /* If we added a signal, we also need to add a wake-up on
-		     the futex.  We also need to do that if we skipped adding
-		     a signal because the group is being closed because
-		     while __condvar_quiesce_and_switch_g1 could have closed
-		     the group, it might stil be waiting for futex waiters to
-		     leave (and one of those waiters might be the one we stole
-		     the signal from, which cause it to block using the
-		     futex).  */
-		  futex_wake (cond->__data.__g_signals + g, 1, private);
-		  break;
-		}
-	      /* TODO Back off.  */
-	    }
-	}
-    }
-
- done:
 
   /* Confirm that we have been woken.  We do that before acquiring the mutex
      to allow for execution of pthread_cond_destroy while having acquired the
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_mutex_trylock.c b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_mutex_trylock.c
index 7de61f4..68a2851 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_mutex_trylock.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/nptl/pthread_mutex_trylock.c
@@ -92,6 +92,9 @@
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
+      /* We need to set op_pending before starting the operation.  Also
+	 see comments at ENQUEUE_MUTEX.  */
+      __asm ("" ::: "memory");
 
       oldval = mutex->__data.__lock;
       do
@@ -117,7 +120,12 @@
 	      /* But it is inconsistent unless marked otherwise.  */
 	      mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
 
+	      /* We must not enqueue the mutex before we have acquired it.
+		 Also see comments at ENQUEUE_MUTEX.  */
+	      __asm ("" ::: "memory");
 	      ENQUEUE_MUTEX (mutex);
+	      /* We need to clear op_pending after we enqueue the mutex.  */
+	      __asm ("" ::: "memory");
 	      THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 	      /* Note that we deliberately exist here.  If we fall
@@ -133,6 +141,8 @@
 	      int kind = PTHREAD_MUTEX_TYPE (mutex);
 	      if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
 		{
+		  /* We do not need to ensure ordering wrt another memory
+		     access.  Also see comments at ENQUEUE_MUTEX. */
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 				 NULL);
 		  return EDEADLK;
@@ -140,6 +150,8 @@
 
 	      if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
 		{
+		  /* We do not need to ensure ordering wrt another memory
+		     access.  */
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 				 NULL);
 
@@ -158,6 +170,9 @@
 							id, 0);
 	  if (oldval != 0 && (oldval & FUTEX_OWNER_DIED) == 0)
 	    {
+	      /* We haven't acquired the lock as it is already acquired by
+		 another owner.  We do not need to ensure ordering wrt another
+		 memory access.  */
 	      THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 	      return EBUSY;
@@ -171,13 +186,20 @@
 	      if (oldval == id)
 		lll_unlock (mutex->__data.__lock,
 			    PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+	      /* FIXME This violates the mutex destruction requirements.  See
+		 __pthread_mutex_unlock_full.  */
 	      THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 	      return ENOTRECOVERABLE;
 	    }
 	}
       while ((oldval & FUTEX_OWNER_DIED) != 0);
 
+      /* We must not enqueue the mutex before we have acquired it.
+	 Also see comments at ENQUEUE_MUTEX.  */
+      __asm ("" ::: "memory");
       ENQUEUE_MUTEX (mutex);
+      /* We need to clear op_pending after we enqueue the mutex.  */
+      __asm ("" ::: "memory");
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
       mutex->__data.__owner = id;
@@ -203,10 +225,15 @@
 	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
 
 	if (robust)
-	  /* Note: robust PI futexes are signaled by setting bit 0.  */
-	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
-			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
-				   | 1));
+	  {
+	    /* Note: robust PI futexes are signaled by setting bit 0.  */
+	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
+				     | 1));
+	    /* We need to set op_pending before starting the operation.  Also
+	       see comments at ENQUEUE_MUTEX.  */
+	    __asm ("" ::: "memory");
+	  }
 
 	oldval = mutex->__data.__lock;
 
@@ -215,12 +242,16 @@
 	  {
 	    if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
 	      {
+		/* We do not need to ensure ordering wrt another memory
+		   access.  */
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 		return EDEADLK;
 	      }
 
 	    if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
 	      {
+		/* We do not need to ensure ordering wrt another memory
+		   access.  */
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 		/* Just bump the counter.  */
@@ -242,6 +273,9 @@
 	  {
 	    if ((oldval & FUTEX_OWNER_DIED) == 0)
 	      {
+		/* We haven't acquired the lock as it is already acquired by
+		   another owner.  We do not need to ensure ordering wrt another
+		   memory access.  */
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 		return EBUSY;
@@ -262,6 +296,9 @@
 	    if (INTERNAL_SYSCALL_ERROR_P (e, __err)
 		&& INTERNAL_SYSCALL_ERRNO (e, __err) == EWOULDBLOCK)
 	      {
+		/* The kernel has not yet finished the mutex owner death.
+		   We do not need to ensure ordering wrt another memory
+		   access.  */
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 		return EBUSY;
@@ -279,7 +316,12 @@
 	    /* But it is inconsistent unless marked otherwise.  */
 	    mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
 
+	    /* We must not enqueue the mutex before we have acquired it.
+	       Also see comments at ENQUEUE_MUTEX.  */
+	    __asm ("" ::: "memory");
 	    ENQUEUE_MUTEX (mutex);
+	    /* We need to clear op_pending after we enqueue the mutex.  */
+	    __asm ("" ::: "memory");
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 
 	    /* Note that we deliberately exit here.  If we fall
@@ -302,13 +344,20 @@
 						  PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
 			      0, 0);
 
+	    /* To the kernel, this will be visible after the kernel has
+	       acquired the mutex in the syscall.  */
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 	    return ENOTRECOVERABLE;
 	  }
 
 	if (robust)
 	  {
+	    /* We must not enqueue the mutex before we have acquired it.
+	       Also see comments at ENQUEUE_MUTEX.  */
+	    __asm ("" ::: "memory");
 	    ENQUEUE_MUTEX_PI (mutex);
+	    /* We need to clear op_pending after we enqueue the mutex.  */
+	    __asm ("" ::: "memory");
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
 	  }
 
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/posix/regexec.c b/google3/third_party/grte/v5_src/glibc-2.27/posix/regexec.c
index 63aef97..9edd9fb 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/posix/regexec.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/posix/regexec.c
@@ -1289,8 +1289,10 @@
 	      else if (naccepted)
 		{
 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
-		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
-			      naccepted) != 0)
+		  if (mctx->input.valid_len - *pidx < naccepted
+		      || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+				  naccepted)
+			  != 0))
 		    return -1;
 		}
 	    }
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/resolv/res_send.c b/google3/third_party/grte/v5_src/glibc-2.27/resolv/res_send.c
index 9e95417..ce63ea8 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/resolv/res_send.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/resolv/res_send.c
@@ -117,6 +117,13 @@
 #define MAXPACKET       65536
 #endif
 
+static inline void tag_socket(int fd) {
+#if defined(SO_NET_PROTOCOL_ID) && defined(PROTOCOL_ID_DNS)
+	int id = PROTOCOL_ID_DNS;
+	setsockopt(fd, SOL_SOCKET, SO_NET_PROTOCOL_ID, &id, sizeof(id));
+#endif
+}
+
 /* From ev_streams.c.  */
 
 static inline void
@@ -743,6 +750,7 @@
 			return (-1);
 		}
 		__set_errno (0);
+		tag_socket(statp->_vcsock);
 		if (connect(statp->_vcsock, nsap,
 			    nsap->sa_family == AF_INET
 			    ? sizeof (struct sockaddr_in)
@@ -945,6 +953,7 @@
 			*terrno = errno;
 			return (-1);
 		}
+		tag_socket(EXT(statp).nssocks[ns]);
 
 		/*
 		 * On a 4.3BSD+ machine (client and server,
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/libm-test-ulps b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/libm-test-ulps
index d9850c8..be06085 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/libm-test-ulps
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/libm-test-ulps
@@ -1940,7 +1940,9 @@
 ldouble: 1
 
 Function: "pow":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
 ildouble: 2
 ldouble: 2
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy-memmove.S b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy-memmove.S
new file mode 100644
index 0000000..7e1163e
--- /dev/null
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy-memmove.S
@@ -0,0 +1,267 @@
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define B_l	x8
+#define B_lw	w8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	src
+#define E_h	count
+#define F_l	srcend
+#define F_h	dst
+#define G_l	count
+#define G_h	dst
+#define tmp1	x14
+
+/* Copies are split into 3 main cases: small copies of up to 16 bytes,
+   medium copies of 17..96 bytes which are fully unrolled. Large copies
+   of more than 96 bytes align the destination and use an unrolled loop
+   processing 64 bytes per iteration.
+   In order to share code with memmove, small and medium copies read all
+   data before writing, allowing any kind of overlap. So small, medium
+   and large backwards memmoves are handled by falling through into memcpy.
+   Overlapping large forward memmoves use a loop that copies backwards.
+*/
+
+#ifndef MEMMOVE
+# define MEMMOVE memmove
+#endif
+#ifndef MEMCPY
+# define MEMCPY memcpy
+#endif
+
+ENTRY_ALIGN (MEMMOVE, 6)
+
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
+
+	sub	tmp1, dstin, src
+	cmp	count, 96
+	ccmp	tmp1, count, 2, hi
+	b.lo	L(move_long)
+
+	/* Common case falls through into memcpy.  */
+END (MEMMOVE)
+libc_hidden_builtin_def (MEMMOVE)
+ENTRY (MEMCPY)
+
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
+
+	prfm	PLDL1KEEP, [src]
+	add	srcend, src, count
+	add	dstend, dstin, count
+	cmp	count, 16
+	b.ls	L(copy16)
+	cmp	count, 96
+	b.hi	L(copy_long)
+
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src]
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Small copies: 0..16 bytes.  */
+L(copy16):
+	cmp	count, 8
+	b.lo	1f
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+	.p2align 4
+1:
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	ldr	A_hw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	A_hw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
+	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
+1:
+	cbz	count, 2f
+	lsr	tmp1, count, 1
+	ldrb	A_lw, [src]
+	ldrb	A_hw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	A_hw, [dstend, -1]
+2:	ret
+
+	.p2align 4
+	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
+	   32 bytes from the end.  */
+L(copy96):
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [src, 32]
+	ldp	D_l, D_h, [src, 48]
+	ldp	E_l, E_h, [srcend, -32]
+	ldp	F_l, F_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin, 32]
+	stp	D_l, D_h, [dstin, 48]
+	stp	E_l, E_h, [dstend, -32]
+	stp	F_l, F_h, [dstend, -16]
+	ret
+
+	/* Align DST to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	.p2align 4
+L(copy_long):
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	ldp	D_l, D_h, [src]
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	L(last64)
+L(loop64):
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	L(loop64)
+
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the end even if
+	   there is just 1 byte left.  */
+L(last64):
+	ldp	E_l, E_h, [srcend, -64]
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [srcend, -16]
+	stp	D_l, D_h, [dst, 64]
+	stp	E_l, E_h, [dstend, -64]
+	stp	A_l, A_h, [dstend, -48]
+	stp	B_l, B_h, [dstend, -32]
+	stp	C_l, C_h, [dstend, -16]
+	ret
+
+	.p2align 4
+L(move_long):
+	cbz	tmp1, 3f
+
+	add	srcend, src, count
+	add	dstend, dstin, count
+
+	/* Align dstend to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	and	tmp1, dstend, 15
+	ldp	D_l, D_h, [srcend, -16]
+	sub	srcend, srcend, tmp1
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]
+	stp	D_l, D_h, [dstend, -16]
+	ldp	B_l, B_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -48]
+	ldp	D_l, D_h, [srcend, -64]!
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	2f
+
+	nop
+1:
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [srcend, -48]
+	stp	D_l, D_h, [dstend, -64]!
+	ldp	D_l, D_h, [srcend, -64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the start even if
+	   there is just 1 byte left.  */
+2:
+	ldp	G_l, G_h, [src, 48]
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [src, 32]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [src, 16]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [src]
+	stp	D_l, D_h, [dstend, -64]
+	stp	G_l, G_h, [dstin, 48]
+	stp	A_l, A_h, [dstin, 32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin]
+3:	ret
+
+END (MEMCPY)
+libc_hidden_builtin_def (MEMCPY)
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy.S b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy.S
index 7e1163e..afd8d3f 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy.S
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memcpy.S
@@ -18,250 +18,17 @@
 
 #include <sysdep.h>
 
-/* Assumptions:
+/* In the static build, do not define memmove in this object file.
  *
- * ARMv8-a, AArch64, unaligned accesses.
+ * glibc defines memmove and memcpy in the same file,
+ * which is problematic overriding memcpy in static builds.
  *
- */
-
-#define dstin	x0
-#define src	x1
-#define count	x2
-#define dst	x3
-#define srcend	x4
-#define dstend	x5
-#define A_l	x6
-#define A_lw	w6
-#define A_h	x7
-#define A_hw	w7
-#define B_l	x8
-#define B_lw	w8
-#define B_h	x9
-#define C_l	x10
-#define C_h	x11
-#define D_l	x12
-#define D_h	x13
-#define E_l	src
-#define E_h	count
-#define F_l	srcend
-#define F_h	dst
-#define G_l	count
-#define G_h	dst
-#define tmp1	x14
-
-/* Copies are split into 3 main cases: small copies of up to 16 bytes,
-   medium copies of 17..96 bytes which are fully unrolled. Large copies
-   of more than 96 bytes align the destination and use an unrolled loop
-   processing 64 bytes per iteration.
-   In order to share code with memmove, small and medium copies read all
-   data before writing, allowing any kind of overlap. So small, medium
-   and large backwards memmoves are handled by falling through into memcpy.
-   Overlapping large forward memmoves use a loop that copies backwards.
-*/
-
-#ifndef MEMMOVE
-# define MEMMOVE memmove
-#endif
-#ifndef MEMCPY
-# define MEMCPY memcpy
+ * This google-local patch is a workaround for the issue.
+ *
+ * This condition lets memmove and memcpy each get their own object file,
+ * which avoids multiple definition errors.  */
+#if !defined(SHARED)
+#define MEMMOVE __libc_internal_unused_memmove
 #endif
 
-ENTRY_ALIGN (MEMMOVE, 6)
-
-	DELOUSE (0)
-	DELOUSE (1)
-	DELOUSE (2)
-
-	sub	tmp1, dstin, src
-	cmp	count, 96
-	ccmp	tmp1, count, 2, hi
-	b.lo	L(move_long)
-
-	/* Common case falls through into memcpy.  */
-END (MEMMOVE)
-libc_hidden_builtin_def (MEMMOVE)
-ENTRY (MEMCPY)
-
-	DELOUSE (0)
-	DELOUSE (1)
-	DELOUSE (2)
-
-	prfm	PLDL1KEEP, [src]
-	add	srcend, src, count
-	add	dstend, dstin, count
-	cmp	count, 16
-	b.ls	L(copy16)
-	cmp	count, 96
-	b.hi	L(copy_long)
-
-	/* Medium copies: 17..96 bytes.  */
-	sub	tmp1, count, 1
-	ldp	A_l, A_h, [src]
-	tbnz	tmp1, 6, L(copy96)
-	ldp	D_l, D_h, [srcend, -16]
-	tbz	tmp1, 5, 1f
-	ldp	B_l, B_h, [src, 16]
-	ldp	C_l, C_h, [srcend, -32]
-	stp	B_l, B_h, [dstin, 16]
-	stp	C_l, C_h, [dstend, -32]
-1:
-	stp	A_l, A_h, [dstin]
-	stp	D_l, D_h, [dstend, -16]
-	ret
-
-	.p2align 4
-	/* Small copies: 0..16 bytes.  */
-L(copy16):
-	cmp	count, 8
-	b.lo	1f
-	ldr	A_l, [src]
-	ldr	A_h, [srcend, -8]
-	str	A_l, [dstin]
-	str	A_h, [dstend, -8]
-	ret
-	.p2align 4
-1:
-	tbz	count, 2, 1f
-	ldr	A_lw, [src]
-	ldr	A_hw, [srcend, -4]
-	str	A_lw, [dstin]
-	str	A_hw, [dstend, -4]
-	ret
-
-	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
-	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
-1:
-	cbz	count, 2f
-	lsr	tmp1, count, 1
-	ldrb	A_lw, [src]
-	ldrb	A_hw, [srcend, -1]
-	ldrb	B_lw, [src, tmp1]
-	strb	A_lw, [dstin]
-	strb	B_lw, [dstin, tmp1]
-	strb	A_hw, [dstend, -1]
-2:	ret
-
-	.p2align 4
-	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
-	   32 bytes from the end.  */
-L(copy96):
-	ldp	B_l, B_h, [src, 16]
-	ldp	C_l, C_h, [src, 32]
-	ldp	D_l, D_h, [src, 48]
-	ldp	E_l, E_h, [srcend, -32]
-	ldp	F_l, F_h, [srcend, -16]
-	stp	A_l, A_h, [dstin]
-	stp	B_l, B_h, [dstin, 16]
-	stp	C_l, C_h, [dstin, 32]
-	stp	D_l, D_h, [dstin, 48]
-	stp	E_l, E_h, [dstend, -32]
-	stp	F_l, F_h, [dstend, -16]
-	ret
-
-	/* Align DST to 16 byte alignment so that we don't cross cache line
-	   boundaries on both loads and stores.  There are at least 96 bytes
-	   to copy, so copy 16 bytes unaligned and then align.  The loop
-	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
-
-	.p2align 4
-L(copy_long):
-	and	tmp1, dstin, 15
-	bic	dst, dstin, 15
-	ldp	D_l, D_h, [src]
-	sub	src, src, tmp1
-	add	count, count, tmp1	/* Count is now 16 too large.  */
-	ldp	A_l, A_h, [src, 16]
-	stp	D_l, D_h, [dstin]
-	ldp	B_l, B_h, [src, 32]
-	ldp	C_l, C_h, [src, 48]
-	ldp	D_l, D_h, [src, 64]!
-	subs	count, count, 128 + 16	/* Test and readjust count.  */
-	b.ls	L(last64)
-L(loop64):
-	stp	A_l, A_h, [dst, 16]
-	ldp	A_l, A_h, [src, 16]
-	stp	B_l, B_h, [dst, 32]
-	ldp	B_l, B_h, [src, 32]
-	stp	C_l, C_h, [dst, 48]
-	ldp	C_l, C_h, [src, 48]
-	stp	D_l, D_h, [dst, 64]!
-	ldp	D_l, D_h, [src, 64]!
-	subs	count, count, 64
-	b.hi	L(loop64)
-
-	/* Write the last full set of 64 bytes.  The remainder is at most 64
-	   bytes, so it is safe to always copy 64 bytes from the end even if
-	   there is just 1 byte left.  */
-L(last64):
-	ldp	E_l, E_h, [srcend, -64]
-	stp	A_l, A_h, [dst, 16]
-	ldp	A_l, A_h, [srcend, -48]
-	stp	B_l, B_h, [dst, 32]
-	ldp	B_l, B_h, [srcend, -32]
-	stp	C_l, C_h, [dst, 48]
-	ldp	C_l, C_h, [srcend, -16]
-	stp	D_l, D_h, [dst, 64]
-	stp	E_l, E_h, [dstend, -64]
-	stp	A_l, A_h, [dstend, -48]
-	stp	B_l, B_h, [dstend, -32]
-	stp	C_l, C_h, [dstend, -16]
-	ret
-
-	.p2align 4
-L(move_long):
-	cbz	tmp1, 3f
-
-	add	srcend, src, count
-	add	dstend, dstin, count
-
-	/* Align dstend to 16 byte alignment so that we don't cross cache line
-	   boundaries on both loads and stores.  There are at least 96 bytes
-	   to copy, so copy 16 bytes unaligned and then align.  The loop
-	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
-
-	and	tmp1, dstend, 15
-	ldp	D_l, D_h, [srcend, -16]
-	sub	srcend, srcend, tmp1
-	sub	count, count, tmp1
-	ldp	A_l, A_h, [srcend, -16]
-	stp	D_l, D_h, [dstend, -16]
-	ldp	B_l, B_h, [srcend, -32]
-	ldp	C_l, C_h, [srcend, -48]
-	ldp	D_l, D_h, [srcend, -64]!
-	sub	dstend, dstend, tmp1
-	subs	count, count, 128
-	b.ls	2f
-
-	nop
-1:
-	stp	A_l, A_h, [dstend, -16]
-	ldp	A_l, A_h, [srcend, -16]
-	stp	B_l, B_h, [dstend, -32]
-	ldp	B_l, B_h, [srcend, -32]
-	stp	C_l, C_h, [dstend, -48]
-	ldp	C_l, C_h, [srcend, -48]
-	stp	D_l, D_h, [dstend, -64]!
-	ldp	D_l, D_h, [srcend, -64]!
-	subs	count, count, 64
-	b.hi	1b
-
-	/* Write the last full set of 64 bytes.  The remainder is at most 64
-	   bytes, so it is safe to always copy 64 bytes from the start even if
-	   there is just 1 byte left.  */
-2:
-	ldp	G_l, G_h, [src, 48]
-	stp	A_l, A_h, [dstend, -16]
-	ldp	A_l, A_h, [src, 32]
-	stp	B_l, B_h, [dstend, -32]
-	ldp	B_l, B_h, [src, 16]
-	stp	C_l, C_h, [dstend, -48]
-	ldp	C_l, C_h, [src]
-	stp	D_l, D_h, [dstend, -64]
-	stp	G_l, G_h, [dstin, 48]
-	stp	A_l, A_h, [dstin, 32]
-	stp	B_l, B_h, [dstin, 16]
-	stp	C_l, C_h, [dstin]
-3:	ret
-
-END (MEMCPY)
-libc_hidden_builtin_def (MEMCPY)
+#include "./memcpy-memmove.S"
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memmove.S b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memmove.S
index 0feeac8..1a8c2e3 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memmove.S
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/aarch64/memmove.S
@@ -1 +1,33 @@
-/* memmove is part of memcpy.S.  */
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* In the static build, define memmove in this object file.
+ *
+ * glibc defines memmove and memcpy in the same file,
+ * which is problematic overriding memcpy in static builds.
+ *
+ * This google-local patch is a workaround for the issue.
+ *
+ * This condition lets memmove and memcpy each get their own object file,
+ * which avoids multiple definition errors.  */
+#if !defined(SHARED)
+#define MEMCPY __libc_internal_unused_memcpy
+#include "./memcpy-memmove.S"
+#endif
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/generic/math_private.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/generic/math_private.h
index d5b900a..5c2d472 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/generic/math_private.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/generic/math_private.h
@@ -250,20 +250,18 @@
 
 
 /* Prototypes for functions of the IBM Accurate Mathematical Library.  */
-extern double __exp1 (double __x, double __xx, double __error);
+extern double __exp1(double __x, double __xx);
 extern double __sin (double __x);
 extern double __cos (double __x);
 extern int __branred (double __x, double *__a, double *__aa);
 extern void __doasin (double __x, double __dx, double __v[]);
 extern void __dubsin (double __x, double __dx, double __v[]);
 extern void __dubcos (double __x, double __dx, double __v[]);
-extern double __halfulp (double __x, double __y);
 extern double __sin32 (double __x, double __res, double __res1);
 extern double __cos32 (double __x, double __res, double __res1);
 extern double __mpsin (double __x, double __dx, bool __range_reduce);
 extern double __mpcos (double __x, double __dx, bool __range_reduce);
 extern double __slowexp (double __x);
-extern double __slowpow (double __x, double __y, double __z);
 extern void __docos (double __x, double __dx, double __v[]);
 
 #ifndef math_opt_barrier
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/halfulp.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/slowpow.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/i386/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/halfulp.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/slowpow.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ia64/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_exp.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_exp.c
index 3d2560c..7a9daa5 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_exp.c
@@ -233,13 +233,10 @@
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-/* Compute e^(x+xx).  The routine also receives bound of error of previous
-   calculation.  If after computing exp the error exceeds the allowed bounds,
-   the routine returns a non-positive number.  Otherwise it returns the
-   computed result, which is always positive.  */
+/* Compute e^(x+xx).  */
 double
 SECTION
-__exp1 (double x, double xx, double error)
+__exp1 (double x, double xx)
 {
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
   mynumber junk1, junk2, binexp = {{0, 0}};
@@ -249,6 +246,7 @@
   m = junk1.i[HIGH_HALF];
   n = m & hugeint;		/* no sign */
 
+  /* fabs (x) > 5.551112e-17 and fabs (x) < 7.080010e+02.  */
   if (n > smallint && n < bigint)
     {
       y = x * log2e.x + three51.x;
@@ -276,11 +274,9 @@
 
       rem = (bet + bet * eps) + al * eps;
       res = al + rem;
-      cor = (al - res) + rem;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x;
-      else
-	return -10.0;
+      /* Maximum relative error before rounding is 8.8e-22 (69.9 bits).
+	 Maximum ULP error is 0.500008.  */
+      return res * binexp.x;
     }
 
   if (n <= smallint)
@@ -318,6 +314,7 @@
   cor = (al - res) + rem;
   if (m >> 31)
     {
+      /* x < 0.  */
       ex = junk1.i[LOW_HALF];
       if (res < 1.0)
 	{
@@ -328,34 +325,25 @@
       if (ex >= -1022)
 	{
 	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
-	  if (res == (res + cor * (1.0 + error + err_1)))
-	    return res * binexp.x;
-	  else
-	    return -10.0;
+	  /* Maximum ULP error is 0.500008.  */
+	  return res * binexp.x;
 	}
+      /* Denormal case - ex < -1022.  */
       ex = -(1022 + ex);
       binexp.i[HIGH_HALF] = (1023 - ex) << 20;
       res *= binexp.x;
       cor *= binexp.x;
-      eps = 1.00000000001 + (error + err_1) * binexp.x;
       t = 1.0 + res;
       y = ((1.0 - t) + res) + cor;
       res = t + y;
-      cor = (t - res) + y;
-      if (res == (res + eps * cor))
-	{
-	  binexp.i[HIGH_HALF] = 0x00100000;
-	  return (res - 1.0) * binexp.x;
-	}
-      else
-	return -10.0;
+      binexp.i[HIGH_HALF] = 0x00100000;
+      /* Maximum ULP error is 0.500004.  */
+      return (res - 1.0) * binexp.x;
     }
   else
     {
       binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x * t256.x;
-      else
-	return -10.0;
+      /* Maximum ULP error is 0.500008.  */
+      return res * binexp.x * t256.x;
     }
 }
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_pow.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_pow.c
index f6e5fcd..542d03a 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/e_pow.c
@@ -20,13 +20,9 @@
 /*  MODULE_NAME: upow.c                                                    */
 /*                                                                         */
 /*  FUNCTIONS: upow                                                        */
-/*             power1                                                      */
-/*             my_log2                                                     */
 /*             log1                                                        */
 /*             checkint                                                    */
 /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h                             */
-/*               halfulp.c mpexp.c mplog.c slowexp.c slowpow.c mpa.c       */
-/*                          uexp.c  upow.c				   */
 /*               root.tbl uexp.tbl upow.tbl                                */
 /* An ultimate power routine. Given two IEEE double machine numbers y,x    */
 /* it computes the correctly rounded (to nearest) value of x^y.            */
@@ -50,11 +46,8 @@
 
 static const double huge = 1.0e300, tiny = 1.0e-300;
 
-double __exp1 (double x, double xx, double error);
-static double log1 (double x, double *delta, double *error);
-static double my_log2 (double x, double *delta, double *error);
-double __slowpow (double x, double y, double z);
-static double power1 (double x, double y);
+double __exp1 (double x, double xx);
+static double log1 (double x, double *delta);
 static int checkint (double x);
 
 /* An ultimate power routine. Given two IEEE double machine numbers y, x it
@@ -63,7 +56,7 @@
 SECTION
 __ieee754_pow (double x, double y)
 {
-  double z, a, aa, error, t, a1, a2, y1, y2;
+  double z, a, aa, t, a1, a2, y1, y2;
   mynumber u, v;
   int k;
   int4 qx, qy;
@@ -100,7 +93,7 @@
 	   not matter if |y| <= 2**-64.  */
 	if (fabs (y) < 0x1p-64)
 	  y = y < 0 ? -0x1p-64 : 0x1p-64;
-	z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
+	z = log1 (x, &aa);	/* x^y  =e^(y log (X)) */
 	t = y * CN;
 	y1 = t - (t - y);
 	y2 = y - y1;
@@ -111,9 +104,16 @@
 	aa = y2 * a1 + y * a2;
 	a1 = a + aa;
 	a2 = (a - a1) + aa;
-	error = error * fabs (y);
-	t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
-	retval = (t > 0) ? t : power1 (x, y);
+
+	/* Maximum relative error RElog of log1 is 1.0e-21 (69.7 bits).
+	   Maximum relative error REexp of __exp1 is 8.8e-22 (69.9 bits).
+	   We actually compute exp ((1 + RElog) * log (x) * y) * (1 + REexp).
+	   Since RElog/REexp are tiny and log (x) * y is at most log (DBL_MAX),
+	   this is equivalent to pow (x, y) * (1 + 710 * RElog + REexp).
+	   So the relative error is 710 * 1.0e-21 + 8.8e-22 = 7.1e-19
+	   (60.2 bits).  The worst-case ULP error is 0.5064.  */
+
+	retval = __exp1 (a1, a2);
       }
 
       if (isinf (retval))
@@ -218,33 +218,11 @@
 strong_alias (__ieee754_pow, __pow_finite)
 #endif
 
-/* Compute x^y using more accurate but more slow log routine.  */
-static double
-SECTION
-power1 (double x, double y)
-{
-  double z, a, aa, error, t, a1, a2, y1, y2;
-  z = my_log2 (x, &aa, &error);
-  t = y * CN;
-  y1 = t - (t - y);
-  y2 = y - y1;
-  t = z * CN;
-  a1 = t - (t - z);
-  a2 = z - a1;
-  a = y * z;
-  aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
-  a1 = a + aa;
-  a2 = (a - a1) + aa;
-  error = error * fabs (y);
-  t = __exp1 (a1, a2, 1.9e16 * error);
-  return (t >= 0) ? t : __slowpow (x, y, z);
-}
-
 /* Compute log(x) (x is left argument). The result is the returned double + the
-   parameter DELTA.  The result is bounded by ERROR.  */
+   parameter DELTA.  */
 static double
 SECTION
-log1 (double x, double *delta, double *error)
+log1 (double x, double *delta)
 {
   unsigned int i, j;
   int m;
@@ -260,9 +238,7 @@
 
   u.x = x;
   m = u.i[HIGH_HALF];
-  *error = 0;
-  *delta = 0;
-  if (m < 0x00100000)		/*  1<x<2^-1007 */
+  if (m < 0x00100000)		/* Handle denormal x.  */
     {
       x = x * t52.x;
       add = -52.0;
@@ -284,7 +260,7 @@
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
   i = (v.i[LOW_HALF] & 0x000003ff) << 2;
-  if (two52.i[LOW_HALF] == 1023)	/* nx = 0              */
+  if (two52.i[LOW_HALF] == 1023)	/* Exponent of x is 0.  */
     {
       if (i > 1192 && i < 1208)	/* |x-1| < 1.5*2**-10  */
 	{
@@ -296,8 +272,8 @@
 							   * (r7 + t * r8)))))
 		- 0.5 * t2 * (t + t1));
 	  res = e1 + e2;
-	  *error = 1.0e-21 * fabs (t);
 	  *delta = (e1 - res) + e2;
+	  /* Max relative error is 1.464844e-24, so accurate to 79.1 bits.  */
 	  return res;
 	}			/* |x-1| < 1.5*2**-10  */
       else
@@ -316,12 +292,12 @@
 	  t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
 		* (p2 + e * (p3 + e * p4)));
 	  res = t1 + t2;
-	  *error = 1.0e-24;
 	  *delta = (t1 - res) + t2;
+	  /* Max relative error is 1.0e-24, so accurate to 79.7 bits.  */
 	  return res;
 	}
-    }				/* nx = 0 */
-  else				/* nx != 0   */
+    }
+  else				/* Exponent of x != 0.  */
     {
       eps = u.x - uu;
       nx = (two52.x - two52e.x) + add;
@@ -334,113 +310,13 @@
       t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
 	    * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
       res = t1 + t2;
-      *error = 1.0e-21;
       *delta = (t1 - res) + t2;
-      return res;
-    }				/* nx != 0   */
-}
-
-/* Slower but more accurate routine of log.  The returned result is double +
-   DELTA.  The result is bounded by ERROR.  */
-static double
-SECTION
-my_log2 (double x, double *delta, double *error)
-{
-  unsigned int i, j;
-  int m;
-  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
-  double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
-  double y, yy, z, zz, j1, j2, j7, j8;
-#ifndef DLA_FMS
-  double j3, j4, j5, j6;
-#endif
-  mynumber u, v;
-#ifdef BIG_ENDI
-  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
-#else
-# ifdef LITTLE_ENDI
-  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
-# endif
-#endif
-
-  u.x = x;
-  m = u.i[HIGH_HALF];
-  *error = 0;
-  *delta = 0;
-  add = 0;
-  if (m < 0x00100000)
-    {				/* x < 2^-1022 */
-      x = x * t52.x;
-      add = -52.0;
-      u.x = x;
-      m = u.i[HIGH_HALF];
-    }
-
-  if ((m & 0x000fffff) < 0x0006a09e)
-    {
-      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
-      two52.i[LOW_HALF] = (m >> 20);
-    }
-  else
-    {
-      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
-      two52.i[LOW_HALF] = (m >> 20) + 1;
-    }
-
-  v.x = u.x + bigu.x;
-  uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
-  /*------------------------------------- |x-1| < 2**-11-------------------------------  */
-  if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
-    {
-      t = x - 1.0;
-      EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
-      ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
-      MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
-      MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
-
-      e1 = t + z;
-      e2 = ((((t - e1) + z) + zz) + t * t * t
-	    * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
-      res = e1 + e2;
-      *error = 1.0e-25 * fabs (t);
-      *delta = (e1 - res) + e2;
-      return res;
-    }
-  /*----------------------------- |x-1| > 2**-11  --------------------------  */
-  else
-    {				/*Computing log(x) according to log table                        */
-      nx = (two52.x - two52e.x) + add;
-      ou1 = ui.x[i];
-      ou2 = ui.x[i + 1];
-      lu1 = ui.x[i + 2];
-      lu2 = ui.x[i + 3];
-      v.x = u.x * (ou1 + ou2) + bigv.x;
-      vv = v.x - bigv.x;
-      j = v.i[LOW_HALF] & 0x0007ffff;
-      j = j + j + j;
-      eps = u.x - uu * vv;
-      ov = vj.x[j];
-      lv1 = vj.x[j + 1];
-      lv2 = vj.x[j + 2];
-      a = (ou1 + ou2) * (1.0 + ov);
-      a1 = (a + 1.0e10) - 1.0e10;
-      a2 = a * (1.0 - a1 * uu * vv);
-      e1 = eps * a1;
-      e2 = eps * a2;
-      e = e1 + e2;
-      e2 = (e1 - e) + e2;
-      t = nx * ln2a.x + lu1 + lv1;
-      t1 = t + e;
-      t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
-	    * (p2 + e * (p3 + e * p4)));
-      res = t1 + t2;
-      *error = 1.0e-27;
-      *delta = (t1 - res) + t2;
+      /* Max relative error is 1.0e-21, so accurate to 69.7 bits.  */
       return res;
     }
 }
 
+
 /* This function receives a double x and checks if it is an integer.  If not,
    it returns 0, else it returns 1 if even or -1 if odd.  */
 static int
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/halfulp.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/halfulp.c
deleted file mode 100644
index 0768d86..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/halfulp.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/************************************************************************/
-/*                                                                      */
-/* MODULE_NAME:halfulp.c                                                */
-/*                                                                      */
-/*  FUNCTIONS:halfulp                                                   */
-/*  FILES NEEDED: mydefs.h dla.h endian.h                               */
-/*                uroot.c                                               */
-/*                                                                      */
-/*Routine halfulp(double x, double y) computes x^y where result does    */
-/*not need rounding. If the result is closer to 0 than can be           */
-/*represented it returns 0.                                             */
-/*     In the following cases the function does not compute anything    */
-/*and returns a negative number:                                        */
-/*1. if the result needs rounding,                                      */
-/*2. if y is outside the interval [0,  2^20-1],                         */
-/*3. if x can be represented by  x=2**n for some integer n.             */
-/************************************************************************/
-
-#include "endian.h"
-#include "mydefs.h"
-#include <dla.h>
-#include <math_private.h>
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-static const int4 tab54[32] = {
-  262143, 11585,  1782, 511, 210, 107, 63, 42,
-  30,     22,     17,   14,  12,  10,  9, 7,
-  7,      6,      5,    5,   5,   4,   4, 4,
-  3,      3,      3,    3,   3,   3,   3, 3
-};
-
-
-double
-SECTION
-__halfulp (double x, double y)
-{
-  mynumber v;
-  double z, u, uu;
-#ifndef DLA_FMS
-  double j1, j2, j3, j4, j5;
-#endif
-  int4 k, l, m, n;
-  if (y <= 0)                 /*if power is negative or zero */
-    {
-      v.x = y;
-      if (v.i[LOW_HALF] != 0)
-	return -10.0;
-      v.x = x;
-      if (v.i[LOW_HALF] != 0)
-	return -10.0;
-      if ((v.i[HIGH_HALF] & 0x000fffff) != 0)
-	return -10;                                     /* if x =2 ^ n */
-      k = ((v.i[HIGH_HALF] & 0x7fffffff) >> 20) - 1023; /* find this n */
-      z = (double) k;
-      return (z * y == -1075.0) ? 0 : -10.0;
-    }
-  /* if y > 0  */
-  v.x = y;
-  if (v.i[LOW_HALF] != 0)
-    return -10.0;
-
-  v.x = x;
-  /*  case where x = 2**n for some integer n */
-  if (((v.i[HIGH_HALF] & 0x000fffff) | v.i[LOW_HALF]) == 0)
-    {
-      k = (v.i[HIGH_HALF] >> 20) - 1023;
-      return (((double) k) * y == -1075.0) ? 0 : -10.0;
-    }
-
-  v.x = y;
-  k = v.i[HIGH_HALF];
-  m = k << 12;
-  l = 0;
-  while (m)
-    {
-      m = m << 1; l++;
-    }
-  n = (k & 0x000fffff) | 0x00100000;
-  n = n >> (20 - l);                       /*   n is the odd integer of y    */
-  k = ((k >> 20) - 1023) - l;               /*   y = n*2**k                   */
-  if (k > 5)
-    return -10.0;
-  if (k > 0)
-    for (; k > 0; k--)
-      n *= 2;
-  if (n > 34)
-    return -10.0;
-  k = -k;
-  if (k > 5)
-    return -10.0;
-
-  /*   now treat x        */
-  while (k > 0)
-    {
-      z = __ieee754_sqrt (x);
-      EMULV (z, z, u, uu, j1, j2, j3, j4, j5);
-      if (((u - x) + uu) != 0)
-	break;
-      x = z;
-      k--;
-    }
-  if (k)
-    return -10.0;
-
-  /* it is impossible that n == 2,  so the mantissa of x must be short  */
-
-  v.x = x;
-  if (v.i[LOW_HALF])
-    return -10.0;
-  k = v.i[HIGH_HALF];
-  m = k << 12;
-  l = 0;
-  while (m)
-    {
-      m = m << 1; l++;
-    }
-  m = (k & 0x000fffff) | 0x00100000;
-  m = m >> (20 - l);                       /*   m is the odd integer of x    */
-
-  /*   now check whether the length of m**n is at most 54 bits */
-
-  if (m > tab54[n - 3])
-    return -10.0;
-
-  /* yes, it is - now compute x**n by simple multiplications  */
-
-  u = x;
-  for (k = 1; k < n; k++)
-    u = u * x;
-  return u;
-}
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/slowpow.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/slowpow.c
deleted file mode 100644
index d7c7fb3..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/slowpow.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*************************************************************************/
-/* MODULE_NAME:slowpow.c                                                 */
-/*                                                                       */
-/* FUNCTION:slowpow                                                      */
-/*                                                                       */
-/*FILES NEEDED:mpa.h                                                     */
-/*             mpa.c mpexp.c mplog.c halfulp.c                           */
-/*                                                                       */
-/* Given two IEEE double machine numbers y,x , routine  computes the     */
-/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
-/* multiplication (in halfulp.c) or if result isn't accurate enough      */
-/* then routine converts x and y into multi-precision doubles     and    */
-/* calls to mpexp routine                                                */
-/*************************************************************************/
-
-#include "mpa.h"
-#include <math_private.h>
-
-#include <stap-probe.h>
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-void __mpexp (mp_no *x, mp_no *y, int p);
-void __mplog (mp_no *x, mp_no *y, int p);
-double ulog (double);
-double __halfulp (double x, double y);
-
-double
-SECTION
-__slowpow (double x, double y, double z)
-{
-  double res, res1;
-  mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
-  static const mp_no eps = {-3, {1.0, 4.0}};
-  int p;
-
-  /* __HALFULP returns -10 or X^Y.  */
-  res = __halfulp (x, y);
-
-  /* Return if the result was computed by __HALFULP.  */
-  if (res >= 0)
-    return res;
-
-  /* Compute pow as long double.  This is currently only used by powerpc, where
-     one may get 106 bits of accuracy.  */
-#ifdef USE_LONG_DOUBLE_FOR_MP
-  long double ldw, ldz, ldpp;
-  static const long double ldeps = 0x4.0p-96;
-
-  ldz = __ieee754_logl ((long double) x);
-  ldw = (long double) y *ldz;
-  ldpp = __ieee754_expl (ldw);
-  res = (double) (ldpp + ldeps);
-  res1 = (double) (ldpp - ldeps);
-
-  /* Return the result if it is accurate enough.  */
-  if (res == res1)
-    return res;
-#endif
-
-  /* Or else, calculate using multiple precision.  P = 10 implies accuracy of
-     240 bits accuracy, since MP_NO has a radix of 2^24.  */
-  p = 10;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-
-  /* z = x ^ y
-     log (z) = y * log (x)
-     z = exp (y * log (x))  */
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-
-  /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we
-     have last bit accuracy.  */
-  __add (&mpp, &eps, &mpr, p);
-  __mp_dbl (&mpr, &res, p);
-  __sub (&mpp, &eps, &mpr1, p);
-  __mp_dbl (&mpr1, &res1, p);
-  if (res == res1)
-    {
-      /* Track how often we get to the slow pow code plus
-	 its input/output values.  */
-      LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res);
-      return res;
-    }
-
-  /* If we don't, then we repeat using a higher precision.  768 bits of
-     precision ought to be enough for anybody.  */
-  p = 32;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-  __mp_dbl (&mpp, &res, p);
-
-  /* Track how often we get to the uber-slow pow code plus
-     its input/output values.  */
-  LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res);
-
-  return res;
-}
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/uexp.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/uexp.h
index a8a023e..f4483d5 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/uexp.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/ieee754/dbl-64/uexp.h
@@ -30,7 +30,7 @@
 #include "mydefs.h"
 
 const static double zero = 0.0, hhuge = 1.0e300, tiny = 1.0e-300,
-err_0 = 1.000014, err_1 = 0.000016;
+                    err_0 = 1.000014;
 const static int4 bigint = 0x40862002,
              badint = 0x40876000,smallint = 0x3C8fffff;
 const static int4 hugeint = 0x7FFFFFFF, infint = 0x7ff00000;
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/halfulp.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/slowpow.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/m68k/m680x0/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/powerpc/power4/fpu/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/powerpc/power4/fpu/Makefile
index e17d32f..fa1b070 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/powerpc/power4/fpu/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/powerpc/power4/fpu/Makefile
@@ -2,6 +2,5 @@
 
 ifeq ($(subdir),math)
 CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops
-CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1
 CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1
 endif
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/aarch64/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/aarch64/bits/mman.h
index 83ff48c..d87e33f 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/aarch64/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/aarch64/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/alpha/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/alpha/bits/mman.h
index 935804b..3abfd84 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/alpha/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/alpha/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK	  0x40000	/* Do not block on IO.  */
 # define MAP_STACK	  0x80000	/* Allocation is for a stack.  */
 # define MAP_HUGETLB	  0x100000	/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x200000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Flags for `mlockall'.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/arm/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/arm/bits/mman.h
index 9e059e6..81d0ad3 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/arm/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/arm/bits/mman.h
@@ -34,6 +34,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/bits/mman-linux.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/bits/mman-linux.h
index e61212a..70edeb6 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/bits/mman-linux.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/bits/mman-linux.h
@@ -42,6 +42,8 @@
 #define MAP_SHARED	0x01		/* Share changes.  */
 #define MAP_PRIVATE	0x02		/* Changes are private.  */
 #ifdef __USE_MISC
+# define MAP_SHARED_VALIDATE	0x03	/* Share changes and validate
+					   extension flags.  */
 # define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
 #endif
 
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/hppa/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/hppa/bits/mman.h
index 84ba0d6..1d2a175 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/hppa/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/hppa/bits/mman.h
@@ -34,6 +34,8 @@
 #define MAP_SHARED	0x01		/* Share changes */
 #define MAP_PRIVATE	0x02		/* Changes are private */
 #ifdef __USE_MISC
+# define MAP_SHARED_VALIDATE	0x03	/* Share changes and validate
+					   extension flags.  */
 # define MAP_TYPE	0x03		/* Mask for type of mapping */
 #endif
 
@@ -60,6 +62,8 @@
 # define MAP_NONBLOCK	0x20000		/* Do not block on IO */
 # define MAP_STACK	0x40000		/* Create for process/thread stacks */
 # define MAP_HUGETLB	0x80000		/* Create a huge page mapping */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Flags to "msync"  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/ia64/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/ia64/bits/mman.h
index cb64de4..3f984be 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/ia64/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/ia64/bits/mman.h
@@ -35,6 +35,8 @@
 # define MAP_NONBLOCK	  0x10000	/* Do not block on IO.  */
 # define MAP_STACK	  0x20000	/* Allocation is for a stack.  */
 # define MAP_HUGETLB	  0x40000	/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/m68k/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/m68k/bits/mman.h
index db26a44..3829917 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/m68k/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/m68k/bits/mman.h
@@ -34,6 +34,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/microblaze/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/microblaze/bits/mman.h
index b820da1..111f789 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/microblaze/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/microblaze/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK       0x10000     /* Do not block on IO.  */
 # define MAP_STACK          0x20000     /* Allocation is for a stack.  */
 # define MAP_HUGETLB        0x40000     /* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/mips/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/mips/bits/mman.h
index ed8287f..102bc6d 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/mips/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/mips/bits/mman.h
@@ -34,6 +34,8 @@
 # define MAP_NONBLOCK   0x20000         /* do not block on IO */
 # define MAP_STACK	0x40000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x80000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 #define __MAP_ANONYMOUS 0x0800
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/nios2/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/nios2/bits/mman.h
index b7c27b8..1ecb18e 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/nios2/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/nios2/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/powerpc/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
index b8a01fa..e652467 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Flags for `mlockall'.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/riscv/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/riscv/bits/mman.h
index 0e64f1e..06d28d6 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/riscv/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/riscv/bits/mman.h
@@ -30,6 +30,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/s390/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/s390/bits/mman.h
index 4b926e8..6dd9476 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/s390/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/s390/bits/mman.h
@@ -35,6 +35,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sh/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sh/bits/mman.h
index c5844dc..4701e0e 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sh/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sh/bits/mman.h
@@ -34,6 +34,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sparc/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sparc/bits/mman.h
index f0a4797..3a3ffb9 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sparc/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/sparc/bits/mman.h
@@ -36,6 +36,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Flags for `mlockall'.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/test-errno-linux.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/test-errno-linux.c
index be11353..3107254 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/test-errno-linux.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/test-errno-linux.c
@@ -161,7 +161,7 @@
   /* quotactl returns ENOSYS for kernels not configured with
      CONFIG_QUOTA, and may return EPERM if called within certain types
      of containers.  */
-  fails |= test_wrp2 (LIST (ENODEV, ENOSYS, EPERM),
+  fails |= test_wrp2 (LIST (EINVAL, ENODEV, ENOSYS, EPERM),
 		      quotactl, Q_GETINFO, NULL, -1, (caddr_t) &dqblk);
   fails |= test_wrp (EINVAL, sched_getparam, -1, &sch_param);
   fails |= test_wrp (EINVAL, sched_getscheduler, -1);
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/x86/bits/mman.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/x86/bits/mman.h
index fb4737a..3856748 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/x86/bits/mman.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/unix/sysv/linux/x86/bits/mman.h
@@ -39,6 +39,8 @@
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
 # define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
+# define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED but do not unmap
+					   underlying mapping.  */
 #endif
 
 /* Include generic Linux declarations.  */
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86/bits/floatn.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86/bits/floatn.h
index 7387bba..8f01087 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86/bits/floatn.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86/bits/floatn.h
@@ -33,7 +33,7 @@
 #if (defined __x86_64__							\
      ? __GNUC_PREREQ (4, 3)						\
      : (defined __GNU__ ? __GNUC_PREREQ (4, 5) : __GNUC_PREREQ (4, 4))) \
-    || defined __clang__
+    || (defined __clang__ && defined __FLOAT128__ && !defined(__CUDA__))
 # define __HAVE_FLOAT128 1
 #else
 # define __HAVE_FLOAT128 0
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/libm-test-ulps b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/libm-test-ulps
index f34e91a..bbb8a4d 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2470,8 +2470,10 @@
 float: 2
 
 Function: "pow":
+double: 1
 float: 1
 float128: 2
+idouble: 1
 ifloat: 1
 ifloat128: 2
 ildouble: 1
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/Makefile b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/Makefile
index 13125c7..1b8c36a 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -10,9 +10,9 @@
 
 libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
 			e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
-			mplog-fma mpa-fma slowexp-fma slowpow-fma \
+			mplog-fma mpa-fma slowexp-fma \
 			sincos32-fma doasin-fma dosincos-fma \
-			halfulp-fma mpexp-fma \
+			mpexp-fma \
 			mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
 
 CFLAGS-doasin-fma.c = -mfma -mavx2
@@ -22,7 +22,6 @@
 CFLAGS-e_exp-fma.c = -mfma -mavx2
 CFLAGS-e_log-fma.c = -mfma -mavx2
 CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma)
-CFLAGS-halfulp-fma.c = -mfma -mavx2
 CFLAGS-mpa-fma.c = -mfma -mavx2
 CFLAGS-mpatan-fma.c = -mfma -mavx2
 CFLAGS-mpatan2-fma.c = -mfma -mavx2
@@ -33,7 +32,6 @@
 CFLAGS-s_atan-fma.c = -mfma -mavx2
 CFLAGS-sincos32-fma.c = -mfma -mavx2
 CFLAGS-slowexp-fma.c = -mfma -mavx2
-CFLAGS-slowpow-fma.c = -mfma -mavx2
 CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 
@@ -53,9 +51,9 @@
 
 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
 			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
-			mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+			mplog-fma4 mpa-fma4 slowexp-fma4 \
 			sincos32-fma4 doasin-fma4 dosincos-fma4 \
-			halfulp-fma4 mpexp-fma4 \
+			mpexp-fma4 \
 			mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
 
 CFLAGS-doasin-fma4.c = -mfma4
@@ -65,7 +63,6 @@
 CFLAGS-e_exp-fma4.c = -mfma4
 CFLAGS-e_log-fma4.c = -mfma4
 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
-CFLAGS-halfulp-fma4.c = -mfma4
 CFLAGS-mpa-fma4.c = -mfma4
 CFLAGS-mpatan-fma4.c = -mfma4
 CFLAGS-mpatan2-fma4.c = -mfma4
@@ -76,7 +73,6 @@
 CFLAGS-s_atan-fma4.c = -mfma4
 CFLAGS-sincos32-fma4.c = -mfma4
 CFLAGS-slowexp-fma4.c = -mfma4
-CFLAGS-slowpow-fma4.c = -mfma4
 CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
index 6fd4083..73c1e7f 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
@@ -1,6 +1,5 @@
 #define __ieee754_pow __ieee754_pow_fma
 #define __exp1 __exp1_fma
-#define __slowpow __slowpow_fma
 #define SECTION __attribute__ ((section (".text.fma")))
 
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
index 5b3ea8e..8971b65 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
@@ -1,6 +1,5 @@
 #define __ieee754_pow __ieee754_pow_fma4
 #define __exp1 __exp1_fma4
-#define __slowpow __slowpow_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c
deleted file mode 100644
index 6ca7046..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma
-#define SECTION __attribute__ ((section (".text.fma")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
deleted file mode 100644
index a00c17c..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c
deleted file mode 100644
index 160ed68..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma
-#define __add __add_fma
-#define __dbl_mp __dbl_mp_fma
-#define __mpexp __mpexp_fma
-#define __mplog __mplog_fma
-#define __mul __mul_fma
-#define __sub __sub_fma
-#define __halfulp __halfulp_fma
-#define SECTION __attribute__ ((section (".text.fma")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
deleted file mode 100644
index 69d6982..0000000
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mplog __mplog_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/nptl/pthreaddef.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/nptl/pthreaddef.h
index 036deb5..7cf6677 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/nptl/pthreaddef.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/nptl/pthreaddef.h
@@ -41,4 +41,6 @@
 
 /* Location of current stack frame.  The frame pointer is not usable.  */
 #define CURRENT_STACK_FRAME \
-  ({ register char *frame __asm__("rsp"); frame; })
+  ({ register void * p__ __asm__("rsp"); \
+     asm volatile("" : "=r" (p__)); \
+     p__; })
diff --git a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/stackinfo.h b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/stackinfo.h
index f7a5672..1301c5e 100644
--- a/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/stackinfo.h
+++ b/google3/third_party/grte/v5_src/glibc-2.27/sysdeps/x86_64/stackinfo.h
@@ -34,7 +34,9 @@
    for which they need to act as barriers as well, hence the additional
    (unnecessary) parameters.  */
 #define stackinfo_get_sp() \
-  ({ void *p__; asm volatile ("mov %%" RSP_LP ", %0" : "=r" (p__)); p__; })
+  ({ register void * p__ __asm__(RSP_LP); \
+     asm volatile("" : "=r" (p__)); \
+     p__; })
 #define stackinfo_sub_sp(ptr) \
   ({ ptrdiff_t d__;						\
      asm volatile ("sub %%" RSP_LP " , %0" : "=r" (d__) : "0" (ptr));	\